library(tidyverse)
library(readxl)
library(ggforce)
library(concaveman)
library(knitr)
library(olsrr)
library(ranger)
library(Metrics)
library(mgcv)
library(caret)
library(patchwork)
library(glmnet)
library(broom)
library(RColorBrewer)
options(scipen = 999)
set.seed(3630)
# Strike Zone GG Object
geom_zone <- function(top = 11/3, bottom = 3/2, linecolor = "black"){
geom_rect(xmin = -.7083, xmax = .7083, ymin = bottom, ymax = top,
alpha = 0, color = linecolor, linewidth = 0.75)
}
# c(0, 0, -.25, -.5, -.25))
# Home Plate GG Object
geom_plate <- function(pov = "pitcher"){
df <- case_when(
pov == "pitcher" ~
data.frame(x = c(-.7083, .7083, .7083 ,0, -.7083), y = c(0, 0, .25, .5, .25)),
pov == "catcher" ~
data.frame(x = c(-.7083, .7083, .7083 ,0, -.7083), y = c(0, 0, -.25, -.5, -.25))
)
g <- geom_polygon(data = df, aes(x = x, y = y), fill = "white", color = "black", linewidth = 1.25)
g
}
# Barrel Function
is.barrel <- function(LA, EV){
upper <- 1.11*EV - 78.89
lower <- -EV + 124
outcome <- (LA >= lower) & (LA <= upper) & (EV >= 98) & (LA >= 8) & (LA <= 50)
outcome <- replace_na(outcome, FALSE)
outcome
}
# Normal Name Changer
swap_names <- function(name) {
parts <- strsplit(name, ", ")[[1]]
if (length(parts) == 2) {
return(paste(rev(parts), collapse = " "))
} else {
return(name)
}
}
seasonal <- read_csv("CSVs/season_stats.csv")
pitchers <- read_csv("CSVs/pitcher_comps.csv")
arsenal <- read_csv("CSVs/arsenal.csv")
empty <- read_csv("CSVs/bases_empty.csv")
whiff <- pitchers %>%
mutate(whiff = description == "swinging_strike",
whiff = as.character(whiff)) %>%
filter(pitch_type != "NA",
pitch_type != "PO")
LHP <- read_csv("CSVs/lhp_pitches.csv") %>%
select(-...1) %>%
filter(!is.na(pitch_type)) %>%
mutate(pitch_type = str_replace(pitch_type, "CS", "CU"),
pitch_name = str_replace(pitch_name, "Slow Curve", "Curveball"),
pitch_type = str_replace(pitch_type, "KC", "CU"),
pitch_name = str_replace(pitch_name, "Knuckle Curve", "Curveball"))
RHP <- read_csv("CSVs/rhp_pitches.csv") %>%
select(-...1)
whiff_l <- LHP %>%
mutate(whiff = description == "swinging_strike",
whiff = as.character(whiff))
all <- read_csv("CSVs/all_pitches.csv") %>%
mutate(distance_sweet = sqrt(((plate_x - 0.85)^2)+((plate_z - 1.55)^2)))
# Model Data (Pitch = Slider, Pitching Hand = Right)
model_data <- arsenal %>%
filter(pitch_type == "SL",
pitch_hand == "R") %>%
mutate(ovr_break = sqrt(pitcher_break_x^2 + pitcher_break_z^2))
# Simple Linear Regression
lm_simple <- lm(xwOBA ~
pitch_speed + spin_rate + pitcher_break_x + pitcher_break_z +
pitch_usage + ovr_break,
data = model_data)
# Overview of all model combinations
model_all <- ols_step_all_possible(lm_simple)
# Backwards Elimination
lm_simple %>% ols_step_backward_p(penter = 0.2)
##
##
## Stepwise Summary
## -----------------------------------------------------------------------------
## Step Variable AIC SBC SBIC R2 Adj. R2
## -----------------------------------------------------------------------------
## 0 Full Model -879.334 -848.821 -1829.725 0.05444 0.03714
## 1 spin_rate -881.055 -854.356 -1831.498 0.05365 0.03927
## -----------------------------------------------------------------------------
##
## Final Model Output
## ------------------
##
## Model Summary
## ----------------------------------------------------------------
## R 0.232 RMSE 0.064
## R-Squared 0.054 MSE 0.004
## Adj. R-Squared 0.039 Coef. Var 23.574
## Pred R-Squared 0.020 AIC -881.055
## MAE 0.049 SBC -854.356
## ----------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
## AIC: Akaike Information Criteria
## SBC: Schwarz Bayesian Criteria
##
## ANOVA
## -------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## -------------------------------------------------------------------
## Regression 0.077 5 0.015 3.73 0.0027
## Residual 1.356 329 0.004
## Total 1.433 334
## -------------------------------------------------------------------
##
## Parameter Estimates
## --------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## --------------------------------------------------------------------------------------------
## (Intercept) 0.888 0.181 4.899 0.000 0.531 1.244
## pitch_speed -0.006 0.002 -0.259 -3.242 0.001 -0.009 -0.002
## pitcher_break_x 0.007 0.004 0.399 1.828 0.068 0.000 0.014
## pitcher_break_z 0.032 0.017 2.198 1.814 0.071 -0.003 0.066
## pitch_usage 0.000 0.000 -0.105 -1.922 0.055 -0.001 0.000
## ovr_break -0.035 0.018 -2.597 -1.962 0.051 -0.070 0.000
## --------------------------------------------------------------------------------------------
# Stepwise Selection
lm_simple %>% ols_step_both_p(prem = 0.15, pent = 0.15)
##
##
## Stepwise Summary
## ----------------------------------------------------------------------------------
## Step Variable AIC SBC SBIC R2 Adj. R2
## ----------------------------------------------------------------------------------
## 0 Base Model -872.581 -864.953 -1823.341 0.00000 0.00000
## 1 pitch_usage (+) -875.048 -863.605 -1825.823 0.01324 0.01028
## 2 pitch_speed (+) -877.136 -861.879 -1827.881 0.02521 0.01934
## 3 ovr_break (+) -881.609 -862.538 -1832.217 0.04387 0.03520
## ----------------------------------------------------------------------------------
##
## Final Model Output
## ------------------
##
## Model Summary
## ----------------------------------------------------------------
## R 0.209 RMSE 0.064
## R-Squared 0.044 MSE 0.004
## Adj. R-Squared 0.035 Coef. Var 23.624
## Pred R-Squared 0.021 AIC -881.609
## MAE 0.050 SBC -862.538
## ----------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
## AIC: Akaike Information Criteria
## SBC: Schwarz Bayesian Criteria
##
## ANOVA
## -------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## -------------------------------------------------------------------
## Regression 0.063 3 0.021 5.062 0.0019
## Residual 1.370 331 0.004
## Total 1.433 334
## -------------------------------------------------------------------
##
## Parameter Estimates
## ----------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## ----------------------------------------------------------------------------------------
## (Intercept) 0.861 0.176 4.887 0.000 0.514 1.207
## pitch_usage -0.001 0.000 -0.121 -2.249 0.025 -0.001 0.000
## pitch_speed -0.006 0.002 -0.246 -3.236 0.001 -0.009 -0.002
## ovr_break -0.003 0.001 -0.193 -2.541 0.012 -0.005 -0.001
## ----------------------------------------------------------------------------------------
# New Model
lm1 <- lm(xwOBA ~
ovr_break + pitch_usage + pitch_speed,
data = model_data)
# Interaction Linear Regression
lm_interact <- lm(xwOBA ~
pitch_speed + spin_rate + pitcher_break_x + pitcher_break_z + pitch_usage +
ovr_break +
pitch_speed*spin_rate + pitch_speed*pitch_usage + pitch_speed*ovr_break +
pitch_speed*pitcher_break_x + pitch_speed*pitcher_break_z +
spin_rate*ovr_break + spin_rate*pitcher_break_x + spin_rate*pitcher_break_z +
pitch_usage*ovr_break + pitch_usage*spin_rate,
data = model_data)
# model_interact_all <- ols_step_all_possible(lm_interact)
# Stepwise Selection
lm_interact %>% ols_step_both_p(pent = 0.15, prem = 0.05)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------------
## Step Variable AIC SBC SBIC R2 Adj. R2
## ---------------------------------------------------------------------------------------------------
## 0 Base Model -872.581 -864.953 -1826.673 0.00000 0.00000
## 1 pitch_speed:pitch_usage (+) -875.879 -864.437 -1831.752 0.01569 0.01273
## 2 pitcher_break_z (+) -874.227 -858.970 -1831.890 0.01671 0.01079
## 3 pitch_speed:pitch_usage (-) -870.698 -859.256 -1826.584 0.00035 -0.00265
## 4 pitch_speed (+) -877.840 -862.584 -1835.493 0.02726 0.02140
## 5 pitch_usage (+) -881.134 -862.063 -1840.555 0.04251 0.03383
## 6 pitch_usage:ovr_break (+) -882.750 -859.865 -1843.940 0.05279 0.04131
## 7 ovr_break (+) -882.634 -855.935 -1845.597 0.05810 0.04379
## 8 pitcher_break_z (-) -884.460 -861.575 -1845.642 0.05761 0.04619
## 9 spin_rate (+) -882.608 -855.909 -1845.571 0.05803 0.04371
## 10 ovr_break (-) -874.597 -851.713 -1835.823 0.02946 0.01769
## 11 pitch_speed:ovr_break (+) -882.165 -855.466 -1845.130 0.05678 0.04245
## 12 pitch_usage:ovr_break (-) -879.449 -856.564 -1840.653 0.04341 0.03181
## 13 pitcher_break_x (+) -877.639 -850.940 -1840.627 0.04395 0.02942
## 14 spin_rate (-) -879.454 -856.569 -1840.658 0.04342 0.03183
## 15 pitch_speed:pitcher_break_z (+) -880.623 -853.924 -1843.596 0.05243 0.03803
## ---------------------------------------------------------------------------------------------------
##
## Final Model Output
## ------------------
##
## Model Summary
## ----------------------------------------------------------------
## R 0.229 RMSE 0.064
## R-Squared 0.052 MSE 0.004
## Adj. R-Squared 0.038 Coef. Var 23.589
## Pred R-Squared 0.019 AIC -880.623
## MAE 0.049 SBC -853.924
## ----------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
## AIC: Akaike Information Criteria
## SBC: Schwarz Bayesian Criteria
##
## ANOVA
## -------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## -------------------------------------------------------------------
## Regression 0.075 5 0.015 3.641 0.0032
## Residual 1.358 329 0.004
## Total 1.433 334
## -------------------------------------------------------------------
##
## Parameter Estimates
## --------------------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## --------------------------------------------------------------------------------------------------------
## (Intercept) 0.729 0.155 4.705 0.000 0.424 1.034
## pitch_speed -0.004 0.002 -0.179 -2.577 0.010 -0.007 -0.001
## pitch_usage -0.001 0.000 -0.106 -1.946 0.053 -0.001 0.000
## pitcher_break_x 0.007 0.004 0.404 1.778 0.076 -0.001 0.014
## pitch_speed:ovr_break 0.000 0.000 -2.250 -1.904 0.058 -0.001 0.000
## pitch_speed:pitcher_break_z 0.000 0.000 1.935 1.768 0.078 0.000 0.001
## --------------------------------------------------------------------------------------------------------
# Output removes ALL interactions for p < 0.05
# Keeps same as simple LM pitcher_break_z + pitch_speed + pitch_usage
lm_interact %>% ols_step_both_p(pent = 0.15, prem = 0.10)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------------
## Step Variable AIC SBC SBIC R2 Adj. R2
## ---------------------------------------------------------------------------------------------------
## 0 Base Model -872.581 -864.953 -1826.673 0.00000 0.00000
## 1 pitch_speed:pitch_usage (+) -875.879 -864.437 -1831.752 0.01569 0.01273
## 2 pitcher_break_z (+) -874.227 -858.970 -1831.890 0.01671 0.01079
## 3 pitch_speed:pitch_usage (-) -870.698 -859.256 -1826.584 0.00035 -0.00265
## 4 pitch_speed (+) -877.840 -862.584 -1835.493 0.02726 0.02140
## 5 pitch_usage (+) -881.134 -862.063 -1840.555 0.04251 0.03383
## 6 pitch_usage:ovr_break (+) -882.750 -859.865 -1843.940 0.05279 0.04131
## 7 ovr_break (+) -882.634 -855.935 -1845.597 0.05810 0.04379
## 8 pitcher_break_z (-) -884.460 -861.575 -1845.642 0.05761 0.04619
## 9 spin_rate (+) -882.608 -855.909 -1845.571 0.05803 0.04371
## 10 ovr_break (-) -874.597 -851.713 -1835.823 0.02946 0.01769
## 11 pitch_speed:ovr_break (+) -882.165 -855.466 -1845.130 0.05678 0.04245
## 12 pitch_usage:ovr_break (-) -879.449 -856.564 -1840.653 0.04341 0.03181
## 13 pitcher_break_x (+) -877.639 -850.940 -1840.627 0.04395 0.02942
## 14 spin_rate (-) -879.454 -856.569 -1840.658 0.04342 0.03183
## 15 pitch_speed:pitcher_break_z (+) -880.623 -853.924 -1843.596 0.05243 0.03803
## ---------------------------------------------------------------------------------------------------
##
## Final Model Output
## ------------------
##
## Model Summary
## ----------------------------------------------------------------
## R 0.229 RMSE 0.064
## R-Squared 0.052 MSE 0.004
## Adj. R-Squared 0.038 Coef. Var 23.589
## Pred R-Squared 0.019 AIC -880.623
## MAE 0.049 SBC -853.924
## ----------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
## AIC: Akaike Information Criteria
## SBC: Schwarz Bayesian Criteria
##
## ANOVA
## -------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## -------------------------------------------------------------------
## Regression 0.075 5 0.015 3.641 0.0032
## Residual 1.358 329 0.004
## Total 1.433 334
## -------------------------------------------------------------------
##
## Parameter Estimates
## --------------------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## --------------------------------------------------------------------------------------------------------
## (Intercept) 0.729 0.155 4.705 0.000 0.424 1.034
## pitch_speed -0.004 0.002 -0.179 -2.577 0.010 -0.007 -0.001
## pitch_usage -0.001 0.000 -0.106 -1.946 0.053 -0.001 0.000
## pitcher_break_x 0.007 0.004 0.404 1.778 0.076 -0.001 0.014
## pitch_speed:ovr_break 0.000 0.000 -2.250 -1.904 0.058 -0.001 0.000
## pitch_speed:pitcher_break_z 0.000 0.000 1.935 1.768 0.078 0.000 0.001
## --------------------------------------------------------------------------------------------------------
# New Model with Interactions (p -value < 0.10 threshhold)
lm2 <- lm(xwOBA ~
pitch_speed + pitch_usage +
pitch_speed*ovr_break + pitch_speed*pitcher_break_z,
data = model_data)
# Trimmed Data
model_results <- model_data %>%
select(first_name, last_name,
pitch_speed, pitch_usage, pitcher_break_z, ovr_break, xwOBA)
# Comparing Model Predictions
# lm1 = simple
# lm2 = interactions
model_results <- model_results %>%
mutate(lm1 = predict(lm1, model_results)) %>%
mutate(lm2 = predict(lm2, model_results))
# R and RMSE of Simple Linear Model
with(model_results, cor(xwOBA, lm1))
## [1] 0.2094431
with(model_results, rmse(xwOBA, lm1))
## [1] 0.06394718
# R and RMSE of Interactions Linear Model
with(model_results, cor(xwOBA, lm2))
## [1] 0.2151546
with(model_results, rmse(xwOBA, lm2))
## [1] 0.06386603
model_results %>%
select(xwOBA, lm1, lm2) %>%
pivot_longer(cols = lm1:lm2,
names_to = "model",
values_to = "pred") %>%
mutate(model = str_replace(model, "lm1", "Simple LM"),
model = str_replace(model, "lm2", "Interaction LM")) %>%
ggplot(aes(x = xwOBA, y = pred, color = model)) +
geom_point(shape = 18, size = 1.5, alpha = 0.75) +
geom_smooth(se = FALSE) +
scale_color_manual(values = c("navyblue", "skyblue")) +
theme_classic() +
labs(title = "Linear Models for RHP",
x = "Observed",
y = "Predicted",
color = "Model")
# Model Data (Pitch = Slider, Pitching Hand = Left)
model_data_l <- arsenal %>%
filter(pitch_type == "SL",
pitch_hand == "L") %>%
mutate(ovr_break = sqrt(pitcher_break_x^2 + pitcher_break_z^2))
# Simple Linear Regression
lm_simple_l <- lm(xwOBA ~
pitch_speed + spin_rate + pitcher_break_x + pitcher_break_z +
pitch_usage + ovr_break,
data = model_data_l)
# Overview of all model combinations
model_all_l <- ols_step_all_possible(lm_simple_l)
# Backwards Elimination
lm_simple_l %>% ols_step_backward_p(penter = 0.15)
##
##
## Stepwise Summary
## ------------------------------------------------------------------------------
## Step Variable AIC SBC SBIC R2 Adj. R2
## ------------------------------------------------------------------------------
## 0 Full Model -205.439 -184.518 -491.033 0.04617 -0.01471
## 1 pitch_speed -207.131 -188.825 -492.913 0.04325 -0.00710
## 2 spin_rate -208.977 -193.287 -494.917 0.04180 0.00187
## 3 pitch_usage -209.989 -196.914 -496.149 0.03238 0.00245
## ------------------------------------------------------------------------------
##
## Final Model Output
## ------------------
##
## Model Summary
## -----------------------------------------------------------------
## R 0.180 RMSE 0.081
## R-Squared 0.032 MSE 0.007
## Adj. R-Squared 0.002 Coef. Var 29.106
## Pred R-Squared -0.036 AIC -209.989
## MAE 0.057 SBC -196.914
## -----------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
## AIC: Akaike Information Criteria
## SBC: Schwarz Bayesian Criteria
##
## ANOVA
## -------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## -------------------------------------------------------------------
## Regression 0.022 3 0.007 1.082 0.3605
## Residual 0.670 97 0.007
## Total 0.692 100
## -------------------------------------------------------------------
##
## Parameter Estimates
## -------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## -------------------------------------------------------------------------------------------
## (Intercept) 0.336 0.068 4.936 0.000 0.201 0.471
## pitcher_break_x -0.013 0.009 -0.653 -1.425 0.157 -0.031 0.005
## pitcher_break_z -0.073 0.046 -4.509 -1.604 0.112 -0.164 0.017
## ovr_break 0.073 0.046 4.835 1.568 0.120 -0.019 0.165
## -------------------------------------------------------------------------------------------
# Stepwise Selection
# lm_simple_l %>% ols_step_both_p(prem = 0.15, pent = 0.15)
# New Model
lm1_l <- lm(xwOBA ~
pitcher_break_x + pitcher_break_z +
ovr_break,
data = model_data_l)
# Model Data (Pitch = Slider, Pitching Hand = Left)
# Interaction Linear Regression
lm_interact_l <- lm(xwOBA ~
pitch_speed + spin_rate + pitcher_break_x + pitcher_break_z + pitch_usage +
ovr_break +
pitch_speed*spin_rate + pitch_speed*pitch_usage + pitch_speed*ovr_break +
pitch_speed*pitcher_break_x + pitch_speed*pitcher_break_z +
spin_rate*ovr_break + spin_rate*pitcher_break_x + spin_rate*pitcher_break_z +
pitch_usage*ovr_break + pitch_usage*spin_rate,
data = model_data_l)
# model_interact_all <- ols_step_all_possible(lm_interact)
# Stepwise Selection
# lm_interact_l %>% ols_step_both_p(pent = 0.15, prem = 0.05)
# Output removes ALL interactions for p < 0.05
# Keeps same as simple LM pitcher_break_z + pitch_speed + pitch_usage
# lm_interact_l %>% ols_step_both_p(pent = 0.20, prem = 0.05)
# New Model with Interactions (p -value < 0.10 threshhold)
lm2_l <- lm(xwOBA ~
spin_rate*pitcher_break_z,
data = model_data_l)
# Trimmed Data
model_results_l <- model_data_l %>%
select(first_name, last_name,
pitch_speed, pitch_usage, pitcher_break_z, ovr_break, xwOBA)
# Comparing Model Predictions
# lm1 = simple
# lm2 = interactions
model_results_l <- model_results_l %>%
mutate(lm1 = predict(lm1, model_results_l)) %>%
mutate(lm2 = predict(lm2, model_results_l))
# R and RMSE of Simple Linear Model
with(model_results_l, cor(xwOBA, lm1))
## [1] 0.04569634
with(model_results_l, rmse(xwOBA, lm1))
## [1] 0.08383695
# R and RMSE of Interactions Linear Model
with(model_results_l, cor(xwOBA, lm2))
## [1] 0.02805347
with(model_results_l, rmse(xwOBA, lm2))
## [1] 0.08418886
model_results_l %>%
select(xwOBA, lm1, lm2) %>%
pivot_longer(cols = lm1:lm2,
names_to = "model",
values_to = "pred") %>%
mutate(model = str_replace(model, "lm1", "Simple LM"),
model = str_replace(model, "lm2", "Interaction LM")) %>%
ggplot(aes(x = xwOBA, y = pred, color = model)) +
geom_point(shape = 18, size = 1.5, alpha = 0.75) +
geom_smooth(se = FALSE) +
scale_color_manual(values = c("navyblue", "skyblue")) +
theme_classic() +
labs(title = "Linear Models for LHP",
x = "Observed",
y = "Predicted",
color = "Model")
# Pitch by Pitch Data (Sliders)
pitches <- pitchers %>%
filter(pitch_type == "SL") %>%
mutate(pfx_x = pfx_x*12,
pfx_z = pfx_z*12,
ovr_break = round(sqrt(pfx_x^2 + pfx_z^2), 3))
# Simple Linear Regression
lm_pitches <- lm(delta_run_exp ~
release_speed + release_spin_rate + pfx_x + pfx_z +
ovr_break + release_extension,
data = pitches)
# Stepwise Selection
# lm_pitches %>% ols_step_both_p(prem = 0.25, pent = 0.15)
# New Model
lm1_pitches <- lm(delta_run_exp ~
release_spin_rate + pfx_x + release_extension,
data = pitches)
# Interaction Linear Regression
lm_interact_pitches <- lm(delta_run_exp ~
release_speed + release_spin_rate + pfx_x + pfx_z +
ovr_break + release_extension +
release_speed*release_spin_rate + release_speed*pfx_x +
release_speed*pfx_z + release_speed*ovr_break +
release_speed*release_extension +
release_spin_rate*pfx_x + release_spin_rate*pfx_z +
release_spin_rate*ovr_break + release_spin_rate*release_extension +
release_extension*pfx_x + release_extension*pfx_z + release_extension*ovr_break,
data = pitches)
# model_interact_all <- ols_step_all_possible(lm_interact)
# Stepwise Selection
lm_interact_pitches %>% ols_step_both_p(pent = 0.15, prem = 0.15)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------
## Step Variable AIC SBC SBIC R2 Adj. R2
## ---------------------------------------------------------------------------------------------
## 0 Base Model -72.271 -59.891 -10299.956 0.00000 0.00000
## 1 release_spin_rate:pfx_x (+) -73.773 -55.203 -10301.429 0.00097 0.00069
## 2 release_extension (+) -73.562 -48.803 -10301.189 0.00147 0.00091
## 3 ovr_break (+) -73.409 -42.460 -10301.005 0.00198 0.00115
## 4 release_speed (+) -71.418 -34.280 -10298.986 0.00198 0.00087
## 5 ovr_break (-) -71.620 -40.671 -10299.220 0.00148 0.00065
## 6 pfx_x (+) -71.805 -34.667 -10299.372 0.00209 0.00098
## 7 release_extension (-) -72.690 -41.741 -10300.287 0.00178 0.00095
## 8 pfx_z (+) -70.939 -33.800 -10298.508 0.00185 0.00074
## 9 pfx_x (-) -70.480 -39.531 -10298.082 0.00117 0.00033
## 10 release_speed:pfx_z (+) -74.421 -37.282 -10301.980 0.00281 0.00170
## 11 release_spin_rate:pfx_x (-) -75.339 -44.391 -10302.931 0.00251 0.00168
## 12 release_spin_rate (+) -75.115 -37.976 -10302.672 0.00300 0.00190
## 13 pfx_z (-) -71.481 -40.532 -10299.081 0.00144 0.00061
## ---------------------------------------------------------------------------------------------
##
## Final Model Output
## ------------------
##
## Model Summary
## ------------------------------------------------------------------
## R 0.038 RMSE 0.239
## R-Squared 0.001 MSE 0.057
## Adj. R-Squared 0.001 Coef. Var -3503.793
## Pred R-Squared -0.001 AIC -71.481
## MAE 0.118 SBC -40.532
## ------------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
## AIC: Akaike Information Criteria
## SBC: Schwarz Bayesian Criteria
##
## ANOVA
## --------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## --------------------------------------------------------------------
## Regression 0.298 3 0.099 1.736 0.1574
## Residual 206.296 3600 0.057
## Total 206.595 3603
## --------------------------------------------------------------------
##
## Parameter Estimates
## -----------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## -----------------------------------------------------------------------------------------------
## (Intercept) 0.095 0.129 0.738 0.461 -0.158 0.348
## release_speed -0.002 0.002 -0.022 -1.209 0.227 -0.005 0.001
## release_spin_rate 0.000 0.000 0.026 1.325 0.185 0.000 0.000
## release_speed:pfx_z 0.000 0.000 -0.012 -0.583 0.560 0.000 0.000
## -----------------------------------------------------------------------------------------------
lm2_pitches <- lm(delta_run_exp ~
release_spin_rate*pfx_x + release_extension,
data = pitches)
model_results_pitches <- pitches %>%
select(delta_run_exp, release_speed, release_spin_rate, pfx_x, pfx_z,
release_extension) %>%
mutate(lm1 = predict(lm1_pitches, pitches),
lm2 = predict(lm2_pitches, pitches))
# R and RMSE of Simple Linear Model
model_results_pitches %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(cor(delta_run_exp, lm1))
## [1] 0.04470764
model_results_pitches %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(rmse(delta_run_exp, lm1))
## [1] 0.2391844
# R and RMSE of Interaction Linear Model
model_results_pitches %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(cor(delta_run_exp, lm2))
## [1] 0.04598308
model_results_pitches %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(rmse(delta_run_exp, lm2))
## [1] 0.2391706
# Graph
model_results_pitches %>%
select(delta_run_exp, lm1, lm2) %>%
pivot_longer(cols = lm1:lm2,
names_to = "model",
values_to = "pred") %>%
mutate(model = str_replace(model, "lm1", "Simple LM"),
model = str_replace(model, "lm2", "Interaction LM")) %>%
ggplot(aes(x = delta_run_exp, y = pred, color = model)) +
geom_point(shape = 18, size = 1.5, alpha = 0.75) +
geom_smooth(se = FALSE) +
scale_color_manual(values = c("navyblue", "skyblue")) +
theme_classic() +
labs(title = "Linear Models for Pitch-by-Pitch Data",
subtitle = "Predicting Run Expectancy Added",
caption = "Pitchers: Scherzer, Taillon, Keller, Manoah, Gallen, Garcia, Gray",
x = "Observed",
y = "Predicted",
color = "Model")
model_results_pitches_2 <- pitches %>%
select(delta_run_exp, release_speed, release_spin_rate, pfx_x, pfx_z,
release_extension, ID) %>%
mutate(lm1 = predict(lm1_pitches, pitches),
lm2 = predict(lm2_pitches, pitches))
# Graph
model_results_pitches_2 %>%
select(ID,delta_run_exp, lm1, lm2) %>%
pivot_longer(cols = lm1:lm2,
names_to = "model",
values_to = "pred") %>%
mutate(model = str_replace(model, "lm1", "Simple LM"),
model = str_replace(model, "lm2", "Interaction LM")) %>%
ggplot(aes(x = delta_run_exp, y = pred, color = model)) +
geom_point(shape = 18, size = 1.5, alpha = 0.75) +
geom_smooth(se = FALSE) +
scale_color_manual(values = c("navyblue", "skyblue")) +
facet_wrap(~ ID, ncol = 1) +
theme_classic() +
labs(title = "Linear Models for Pitch-by-Pitch Data",
subtitle = "Predicting Run Expectancy Added",
caption = "Pitchers: Scherzer, Taillon, Keller, Manoah, Gallen, Garcia, Gray",
x = "Observed",
y = "Predicted",
color = "Model")
# Correlations
model_results_pitches_2 %>%
filter(ID == "Great") %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(cor(delta_run_exp, lm1))
## [1] 0.06192874
model_results_pitches_2 %>%
filter(ID == "Decent") %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(cor(delta_run_exp, lm1))
## [1] 0.01215481
model_results_pitches_2 %>%
filter(ID == "Bad") %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(cor(delta_run_exp, lm1))
## [1] 0.03652784
# RMSE
model_results_pitches_2 %>%
filter(ID == "Great") %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(rmse(delta_run_exp, lm1))
## [1] 0.1927567
model_results_pitches_2 %>%
filter(ID == "Decent") %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(rmse(delta_run_exp, lm1))
## [1] 0.2850569
model_results_pitches_2 %>%
filter(ID == "Bad") %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(rmse(delta_run_exp, lm1))
## [1] 0.2510364
pitchers %>%
# filter(pitch_type == "SL") %>%
ggplot(aes(x = plate_z)) +
geom_histogram(binwidth = 0.15, color = "white")
pitchers %>%
ggplot(aes(x = delta_run_exp)) +
geom_histogram(binwidth = 0.15, color = "white")
pitchers %>%
filter(pitch_type %in% c("FF", "SL", "CH")) %>%
mutate(pitch_dist = sqrt(plate_x^2 + (2.5 - plate_z)^2)) %>%
ggplot(aes(x = pitch_dist, color = ID)) +
geom_density() +
facet_wrap(~ pitch_type,
ncol = 1)
pitchers %>%
filter(pitch_type %in% c("FF", "SL", "CH")) %>%
mutate(pitch_dist = sqrt(plate_x^2 + (2.5 - plate_z)^2)) %>%
ggplot(aes(x = pitch_dist, y = delta_run_exp)) +
geom_point(alpha = 0.15)
# Model?
model <- lm(delta_run_exp ~ dist + speed_change + break_change,
data = pitchers)
preds <- pitchers %>%
mutate(predicted = predict(model, pitchers)) %>%
rename(observed = delta_run_exp) %>%
select(ID, zone, pitch_type, observed, predicted) %>%
filter(pitch_type != "PO")
acc <- preds %>%
ggplot(aes(x = observed, y = predicted)) +
geom_point(alpha = 0.5) +
geom_smooth()
acc
acc +
facet_wrap(~ pitch_type)
acc +
facet_wrap(~ ID)
data_ff <- pitchers %>%
filter(pitch_type == "FF",
!is.na(break_change)) %>%
filter(pitch_type != "PO")
model_ff <- train(
delta_run_exp ~ dist + speed_change + break_change + release_speed + pfx_x + pfx_z,
data = data_ff,
method = "ranger",
trControl = trainControl(method = "cv", number = 5))
preds_ff <- cbind(data_ff, predict(model_ff)) %>%
as.data.frame() %>%
rename(observed = delta_run_exp,
predicted = "predict(model_ff)") %>%
select(ID, zone, pitch_type, observed, predicted)
preds_ff %>%
ggplot(aes(x = observed, y = predicted)) +
geom_point() +
geom_smooth() +
geom_abline(slope = 1, intercept = 0) +
coord_fixed() +
labs(title = "Fastball RF Model",
caption = paste0("RMSE: ", round(rmse(preds_ff$observed, preds_ff$predicted), 4)))
preds_ff %>%
filter(!is.na(observed),
!is.na(predicted)) %>%
with(cor(observed, predicted))
## [1] 0.9625188
data_ff <- pitchers %>%
filter(pitch_type == "FF",
!is.na(break_change))
model_ff <- ranger(delta_run_exp ~ dist + speed_change + break_change,
data = data_ff, mtry = 2)
preds_ff <- data_ff %>%
mutate(predicted = predict(model_ff, data_ff)$predictions) %>%
rename(observed = delta_run_exp) %>%
select(ID, zone, pitch_type, observed, predicted) %>%
filter(pitch_type != "PO")
preds_ff %>%
ggplot(aes(x = observed, y = predicted)) +
geom_point() +
geom_smooth() +
labs(title = "Fastball RF Model",
caption = paste0("RMSE: ", round(rmse(preds_ff$observed, preds_ff$predicted), 4)))
preds_ff %>%
filter(!is.na(observed),
!is.na(predicted)) %>%
with(cor(observed, predicted))
## [1] 0.9710876
data_si <- pitchers %>%
filter(pitch_type == "SI",
!is.na(break_change))
model_si <- ranger(delta_run_exp ~ dist + speed_change + break_change,
data = data_si, mtry = 2)
preds_si <- data_si %>%
mutate(predicted = predict(model_si, data_si)$predictions) %>%
rename(observed = delta_run_exp) %>%
select(ID, zone, pitch_type, observed, predicted) %>%
filter(pitch_type != "PO")
preds_si %>%
ggplot(aes(x = observed, y = predicted)) +
geom_point() +
geom_smooth() +
labs(title = "Sinker RF Model",
caption = paste0("RMSE: ", round(rmse(preds_si$observed, preds_si$predicted), 4)))
preds_si %>%
filter(!is.na(observed),
!is.na(predicted)) %>%
with(cor(observed, predicted))
## [1] 0.9591401
data_ch <- pitchers %>%
filter(pitch_type == "CH",
!is.na(break_change))
model_ch <- ranger(delta_run_exp ~ dist + speed_change + break_change,
data = data_ch, mtry = 2)
preds_ch <- data_ch %>%
mutate(predicted = predict(model_ch, data_ch)$predictions) %>%
rename(observed = delta_run_exp) %>%
select(ID, zone, pitch_type, observed, predicted) %>%
filter(pitch_type != "PO")
preds_ch %>%
ggplot(aes(x = observed, y = predicted)) +
geom_point() +
geom_smooth() +
labs(title = "Change-Up RF Model",
caption = paste0("RMSE: ", round(rmse(preds_ch$observed, preds_ch$predicted), 4)))
preds_ch %>%
filter(!is.na(observed),
!is.na(predicted)) %>%
with(cor(observed, predicted))
## [1] 0.9661838
data_sl <- pitchers %>%
filter(pitch_type == "SL",
!is.na(break_change))
model_sl <- ranger(delta_run_exp ~ dist + speed_change + break_change,
data = data_sl, mtry = 2)
preds_sl <- data_sl %>%
mutate(predicted = predict(model_sl, data_sl)$predictions) %>%
rename(observed = delta_run_exp) %>%
select(ID, zone, pitch_type, observed, predicted) %>%
filter(pitch_type != "PO")
preds_sl %>%
ggplot(aes(x = observed, y = predicted)) +
geom_point() +
geom_smooth() +
labs(title = "Slider RF Model",
caption = paste0("RMSE: ", round(rmse(preds_sl$observed, preds_sl$predicted), 4)))
preds_sl %>%
filter(!is.na(observed),
!is.na(predicted)) %>%
with(cor(observed, predicted))
## [1] 0.9677164
#
# Actmodel <- train(delta_run_exp ~ dist + speed_change + break_change,
# data = data_sl, method = "ranger",
# trControl = trainControl(method = "cv", number = 10, verboseIter = TRUE), preProcess = c("knnImpute"))
# plot(Actmodel$finalModel$forest)
# Slider Logistic Model
whiff_sl <- whiff %>%
filter(pitch_type == "SL") %>%
mutate(whiff = str_replace(whiff, "TRUE", "1"),
whiff = str_replace(whiff, "FALSE", "0"),
whiff = as.numeric(whiff))
# Original Model
model1 <- glm(whiff ~ release_speed + spin_axis + pfx_x + pfx_z + plate_x + plate_z +
release_spin_rate + speed_change + break_change + pfx_total + dist,
data = whiff_sl, family = binomial)
# Reduced Model
model1 <- glm(whiff ~ release_speed + plate_x + plate_z +
release_spin_rate + speed_change + break_change + pfx_total + dist,
data = whiff_sl, family = binomial)
summary(model1)
##
## Call:
## glm(formula = whiff ~ release_speed + plate_x + plate_z + release_spin_rate +
## speed_change + break_change + pfx_total + dist, family = binomial,
## data = whiff_sl)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 9.0550541 3.0643314 2.955 0.00313 **
## release_speed -0.0924999 0.0353807 -2.614 0.00894 **
## plate_x 0.5454560 0.0870095 6.269 0.000000000364 ***
## plate_z -0.2438012 0.0988763 -2.466 0.01367 *
## release_spin_rate -0.0006138 0.0002186 -2.808 0.00498 **
## speed_change 0.1267177 0.0611358 2.073 0.03820 *
## break_change 1.0446186 0.3722565 2.806 0.00501 **
## pfx_total -0.9888782 0.2453062 -4.031 0.000055493095 ***
## dist -0.5004963 0.1216086 -4.116 0.000038612157 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3093.3 on 3604 degrees of freedom
## Residual deviance: 3003.3 on 3596 degrees of freedom
## (6 observations deleted due to missingness)
## AIC: 3021.3
##
## Number of Fisher Scoring iterations: 5
preds <- whiff_sl %>%
mutate(prediction_log = predict(model1, whiff_sl),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
preds %>%
ggplot(aes(x = as.character(whiff), y = prediction)) +
geom_boxplot() +
geom_jitter(alpha = 0.1, width = 0.1, height = 0)
preds %>%
mutate(prediction = round(prediction, 2)) %>%
group_by(prediction) %>%
summarize(mean(whiff)) %>%
as.data.frame() %>%
ggplot(aes(x = prediction, y = `mean(whiff)`)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(y = "observed whiff proportion",
x = "projected % whiff chance",
title = "Whiff proportion by predicted whiff value",
subtitle = "Whiff predictions have a 1% bin width")
preds %>%
arrange(desc(prediction)) %>%
head(10)
## # A tibble: 10 × 85
## pitch_type game_date release_speed release_pos_x release_pos_z player_name
## <chr> <date> <dbl> <dbl> <dbl> <chr>
## 1 SL 2022-06-03 85.5 -2.02 5.32 Gray, Josiah
## 2 SL 2022-04-13 83.4 -1.79 5.18 Gray, Josiah
## 3 SL 2022-04-13 84.7 -1.75 5.25 Gray, Josiah
## 4 SL 2022-04-13 83.3 -1.71 5.09 Gray, Josiah
## 5 SL 2022-04-08 84.5 -1.54 5.17 Gray, Josiah
## 6 SL 2022-04-13 84.1 -1.74 5.19 Gray, Josiah
## 7 SL 2022-04-08 83.7 -1.61 5.2 Gray, Josiah
## 8 SL 2022-04-08 86.2 -1.84 5.4 Gray, Josiah
## 9 SL 2022-04-26 85.7 -1.79 5.28 Gray, Josiah
## 10 SL 2022-04-08 85.1 -1.69 5.19 Gray, Josiah
## # ℹ 79 more variables: batter <dbl>, pitcher...8 <dbl>, events <chr>,
## # description <chr>, zone <dbl>, des <chr>, game_type <chr>, stand <chr>,
## # p_throws <chr>, home_team <chr>, away_team <chr>, type <chr>,
## # hit_location <dbl>, bb_type <chr>, balls <dbl>, strikes <dbl>,
## # game_year <dbl>, pfx_x <dbl>, pfx_z <dbl>, plate_x <dbl>, plate_z <dbl>,
## # on_3b <dbl>, on_2b <dbl>, on_1b <dbl>, outs_when_up <dbl>, inning <dbl>,
## # inning_topbot <chr>, hc_x <dbl>, hc_y <dbl>, tfs_deprecated <lgl>, …
whiff %>%
mutate(count = paste0(balls, "-", strikes)) %>%
filter(pitch_type == "SL") %>%
ggplot(aes(y = whiff, x = pfx_z*12)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~count) +
labs(title = "Whiff vs. Non-Whiff by Vertical Movement",
x = "Induced Vertical Movement (in.)",
y = "Outcome") +
NULL
# Sliders
whiff %>%
filter(pitch_type =="SL") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(stand), rows = vars(whiff)) +
theme_bw()
# Fastballs
whiff %>%
filter(pitch_type =="FF") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(stand), rows = vars(whiff)) +
theme_bw()
# Change-Ups
whiff %>%
filter(pitch_type =="CH") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(stand), rows = vars(whiff)) +
theme_bw()
whiff %>%
arrange(game_date, player_name, at_bat_number, pitch_number) %>%
mutate(prev_pitch = lag(pitch_type, n = 1, default = NA)) %>%
filter(stand == "R",
pitch_type == "SL",
prev_pitch %in% c("FF", "CH", "SL", "CU"),
player_name == "Scherzer, Max") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
coord_fixed() +
facet_grid(cols = vars(prev_pitch), rows = vars(whiff)) +
theme_bw()
whiff %>%
arrange(game_date, player_name, at_bat_number, pitch_number) %>%
mutate(prev_pitch = lag(pitch_type, n = 1, default = NA)) %>%
filter(stand == "R",
pitch_type == "FF") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
coord_fixed() +
facet_grid(cols = vars(player_name), rows = vars(whiff)) +
theme_bw()
zoned <- whiff %>%
mutate(loc_x = round(plate_x*3, 0),
loc_y = round(plate_z*3, 0))
zoned %>%
filter(pitch_type == "FF",
plate_z > 0 & plate_z < 6,
plate_x > -1.5 & plate_x < 1.5) %>%
summarize(whiff_perc = mean(whiff == "TRUE"),
pitches = n(),
.by = c(loc_x, loc_y, player_name)) %>%
filter(pitches >= 10) %>%
ggplot(aes(x = -loc_x, y = loc_y, fill = whiff_perc)) +
geom_tile() +
scale_fill_gradient(low = "gray", high = "red") +
facet_wrap(~ player_name) +
coord_fixed() +
theme_bw()
# Slider Logistic Model
whiff_sl2 <- whiff_l %>%
filter(pitch_type == "SL") %>%
mutate(whiff = str_replace(whiff, "TRUE", "1"),
whiff = str_replace(whiff, "FALSE", "0"),
whiff = as.numeric(whiff))
# Original Model
model2 <- glm(whiff ~ pitch_speed + spin_axis + pfx_x + pfx_z + plate_x + plate_z +
release_spin_rate + speed_change + break_change + pfx_total + distance,
data = whiff_sl2, family = binomial)
# Reduced Model
model2 <- glm(whiff ~ pitch_speed + plate_x + plate_z +
release_spin_rate + speed_change + break_change + pfx_total + distance,
data = whiff_sl2, family = binomial)
summary(model2)
##
## Call:
## glm(formula = whiff ~ pitch_speed + plate_x + plate_z + release_spin_rate +
## speed_change + break_change + pfx_total + distance, family = binomial,
## data = whiff_sl2)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.0425752 0.9277708 -5.435 0.00000005474968265 ***
## pitch_speed 0.0501462 0.0103594 4.841 0.00000129426814286 ***
## plate_x -0.3698816 0.0469998 -7.870 0.00000000000000355 ***
## plate_z -0.4913540 0.0488865 -10.051 < 0.0000000000000002 ***
## release_spin_rate 0.0002894 0.0001245 2.325 0.0201 *
## speed_change 0.0038569 0.0228735 0.169 0.8661
## break_change 0.2291186 0.2001940 1.144 0.2524
## pfx_total -0.1649991 0.1196077 -1.380 0.1677
## distance -0.7165732 0.0642529 -11.152 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 11359 on 13558 degrees of freedom
## Residual deviance: 11095 on 13550 degrees of freedom
## (98 observations deleted due to missingness)
## AIC: 11113
##
## Number of Fisher Scoring iterations: 5
preds2 <- whiff_sl2 %>%
mutate(prediction_log = predict(model2, whiff_sl2),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
preds2 %>%
ggplot(aes(x = as.character(whiff), y = prediction)) +
geom_boxplot() +
geom_jitter(alpha = 0.1, width = 0.1, height = 0)
preds2 %>%
mutate(prediction = round(prediction, 2)) %>%
group_by(prediction) %>%
summarize(mean(whiff)) %>%
as.data.frame() %>%
ggplot(aes(x = prediction, y = `mean(whiff)`)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(y = "observed whiff proportion",
x = "projected % whiff chance",
title = "Whiff proportion by predicted whiff value",
subtitle = "Whiff predictions have a 1% bin width")
whiff_l %>%
mutate(count = paste0(balls, "-", strikes)) %>%
filter(pitch_type == "SL") %>%
ggplot(aes(y = whiff, x = pfx_z*12)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~count) +
labs(title = "Whiff vs. Non-Whiff by Vertical Movement",
x = "Induced Vertical Movement (in.)",
y = "Outcome") +
NULL
# Sliders
whiff_l %>%
filter(pitch_type =="SL") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(hitter), rows = vars(whiff)) +
theme_bw()
# Fastballs
whiff_l %>%
filter(pitch_type =="FF") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(hitter), rows = vars(whiff)) +
theme_bw()
# Change-Ups
whiff_l %>%
filter(pitch_type =="CH") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(hitter), rows = vars(whiff)) +
theme_bw()
whiff_l %>%
arrange(game_date, player_name, at_bat_number, pitch_number) %>%
mutate(prev_pitch = lag(pitch_type, n = 1, default = NA)) %>%
filter(hitter == "R",
pitch_type == "SL",
prev_pitch %in% c("FF", "CH", "SL", "CU"),
player_name == "Fried, Max") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
coord_fixed() +
facet_grid(cols = vars(prev_pitch), rows = vars(whiff)) +
theme_bw()
whiff_l %>%
arrange(game_date, player_name, at_bat_number, pitch_number) %>%
mutate(prev_pitch = lag(pitch_type, n = 1, default = NA)) %>%
filter(hitter == "R",
pitch_type == "FF") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
coord_fixed() +
facet_grid(cols = vars(player_name), rows = vars(whiff)) +
theme_bw()
zoned2 <- whiff_l %>%
mutate(loc_x = round(plate_x*3, 0),
loc_y = round(plate_z*3, 0))
zoned2 %>%
filter(pitch_type == "FF",
plate_z > 0 & plate_z < 6,
plate_x > -1.5 & plate_x < 1.5) %>%
summarize(whiff_perc = mean(whiff == "TRUE"),
pitches = n(),
.by = c(loc_x, loc_y, player_name)) %>%
filter(pitches >= 10) %>%
ggplot(aes(x = -loc_x, y = loc_y, fill = whiff_perc)) +
geom_tile() +
scale_fill_gradient(low = "gray", high = "red") +
facet_wrap(~ player_name) +
coord_fixed() +
theme_bw()
all2 <- all %>%
filter(!is.na(pfx_z)) %>%
mutate(distance_sweet = sqrt(((plate_x - 0.85)^2)+((plate_z - 1.55)^2))) %>%
mutate(zone = case_when(distance < 2 ~ zone,
distance >= 2 & plate_z > (sz_top + sz_bot)/2 ~ 16,
distance >= 2 & plate_z <= (sz_top + sz_bot)/2 ~ 17)) %>%
mutate(ab_id = paste0(game_date, "_", player_id, "_", at_bat_number),
prev_ab_id = lead(ab_id, 1)) %>%
mutate(prev_pitch = ifelse(ab_id == prev_ab_id, lead(pitch_type, 1), NA)) %>%
select(-ab_id, -prev_ab_id)
all_ff <- all2 %>%
filter(pitch_type == "FF") %>%
summarize(fb_z = mean(pfx_z),
fb_x = mean(pfx_x),
fb_mph = mean(pitch_speed),
.by = c(game_date, player_id))
all2 <- all2 %>%
left_join(all_ff, by = c("game_date" = "game_date",
"player_id" = "player_id")) %>%
mutate(pfx_x_diff = (fb_x - pfx_x),
pfx_z_diff = (fb_z = pfx_z),
speed_ff_diff = (fb_mph - pitch_speed))
sliders <- all2 %>%
filter(p_throws == "R",
hitter == "R") %>%
filter(pitch_type == "SL") %>%
mutate(count = paste0(balls, "_", strikes),
prev_pitch = as.factor(prev_pitch)) %>%
mutate(prev_pitch_ff = ifelse(prev_pitch == "FF", 1, 0))
fastballs <- all2 %>%
filter(p_throws == "R",
hitter == "R") %>%
filter(pitch_type == "FF") %>%
mutate(count = paste0(balls, "_", strikes),
prev_pitch = as.factor(prev_pitch))
# Initial Model
sl_model_all <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
balls + strikes + pfx_total + speed_change + break_change + distance +
prev_pitch_ff + pfx_x_diff + pfx_z_diff + speed_ff_diff,
data = sliders, family = binomial)
# Model Evaluation
# summary(sl_model_all)
sl_model <- glm(whiff ~ pfx_x + pfx_z + plate_x + plate_z + zone +
balls + pfx_total + speed_change + break_change + distance +
prev_pitch_ff + pfx_x_diff + speed_ff_diff,
data = sliders, family = binomial)
# Model Predictions
sl_preds <- sliders %>%
mutate(prediction_log = predict(sl_model, sliders),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
sl_preds %>%
mutate(prediction = round(prediction, 2)) %>%
group_by(prediction) %>%
summarize(n = n(), whiff_rate = mean(whiff)) %>%
as.data.frame() %>%
ggplot(aes(x = prediction, y = whiff_rate)) +
geom_point(aes(size = n)) + # size of bin shown on graph
coord_fixed() +
geom_smooth(se = FALSE) +
labs(y = "observed whiff proportion",
x = "projected % whiff chance",
title = "Slider Whiff Model Prediction",
subtitle = "Whiff proportion by predicted whiff value",
caption = "Whiff predictions have a 1% bin width")
sl0 + sl1 + sl2
# Initial Model
ff_model_all <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
balls + pfx_total + speed_change + break_change + distance +
pfx_x_diff + pfx_z_diff + speed_ff_diff,
data = fastballs, family = binomial)
# Model Evaluation
# summary(ff_model_all)
ff_model <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_z + zone +
pfx_total + speed_change + break_change + distance +
pfx_x_diff,
data = fastballs, family = binomial)
summary(ff_model)
##
## Call:
## glm(formula = whiff ~ pitch_speed + pfx_x + pfx_z + plate_z +
## zone + pfx_total + speed_change + break_change + distance +
## pfx_x_diff, family = binomial, data = fastballs)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.656244 0.896061 -11.892 < 0.0000000000000002 ***
## pitch_speed 0.076168 0.009312 8.180 0.000000000000000285 ***
## pfx_x -0.605126 0.163263 -3.706 0.000210 ***
## pfx_z 1.427415 0.394350 3.620 0.000295 ***
## plate_z 1.148325 0.046268 24.819 < 0.0000000000000002 ***
## zone -0.011277 0.006688 -1.686 0.091785 .
## pfx_total -2.056400 0.447796 -4.592 0.000004384582739110 ***
## speed_change 0.159605 0.022717 7.026 0.000000000002127137 ***
## break_change 2.673909 0.209535 12.761 < 0.0000000000000002 ***
## distance -1.364636 0.077490 -17.610 < 0.0000000000000002 ***
## pfx_x_diff -2.145582 0.162400 -13.212 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 20320 on 31922 degrees of freedom
## Residual deviance: 18490 on 31912 degrees of freedom
## (33 observations deleted due to missingness)
## AIC: 18512
##
## Number of Fisher Scoring iterations: 6
# Model Predictions
ff_preds <- fastballs %>%
mutate(prediction_log = predict(ff_model, fastballs),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
ff_preds %>%
mutate(prediction = round(prediction, 2)) %>%
group_by(prediction) %>%
summarize(mean(whiff)) %>%
as.data.frame() %>%
ggplot(aes(x = prediction, y = `mean(whiff)`)) +
geom_point() +
coord_fixed() +
geom_smooth(se = FALSE) +
labs(y = "observed whiff proportion",
x = "projected % whiff chance",
title = "Fastball Whiff Model Prediction",
subtitle = "Whiff proportion by predicted whiff value",
caption = "Whiff predictions have a 1% bin width")
ff0 + ff1 + ff2
## Sliders
# Model
sl_model_s <- glm(whiff ~ pfx_x + pfx_z + plate_x + plate_z + zone +
balls + pfx_total + speed_change + break_change + distance + distance_sweet,
data = sliders, family = binomial)
summary(sl_model_s)
##
## Call:
## glm(formula = whiff ~ pfx_x + pfx_z + plate_x + plate_z + zone +
## balls + pfx_total + speed_change + break_change + distance +
## distance_sweet, family = binomial, data = sliders)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.87727 0.14741 -12.735 < 0.0000000000000002 ***
## pfx_x 0.48620 0.12644 3.845 0.000120 ***
## pfx_z 0.30552 0.08066 3.788 0.000152 ***
## plate_x -0.70390 0.05572 -12.634 < 0.0000000000000002 ***
## plate_z 0.58543 0.05484 10.675 < 0.0000000000000002 ***
## zone 0.01818 0.01030 1.765 0.077553 .
## balls 0.08350 0.02181 3.828 0.000129 ***
## pfx_total -0.74519 0.14550 -5.122 0.000000303 ***
## speed_change 0.04244 0.01909 2.224 0.026180 *
## break_change 0.51312 0.14775 3.473 0.000515 ***
## distance 0.86608 0.09943 8.710 < 0.0000000000000002 ***
## distance_sweet -1.76648 0.06708 -26.334 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 19154 on 22105 degrees of freedom
## Residual deviance: 17771 on 22094 degrees of freedom
## AIC: 17795
##
## Number of Fisher Scoring iterations: 5
# Model Predictions
sl_preds_s <- sliders %>%
mutate(prediction_log = predict(sl_model_s, sliders),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
sl_preds_s %>%
mutate(prediction = round(prediction, 2)) %>%
group_by(prediction) %>%
summarize(mean(whiff)) %>%
as.data.frame() %>%
ggplot(aes(x = prediction, y = `mean(whiff)`)) +
geom_point() +
coord_fixed() +
geom_smooth(se = FALSE) +
labs(y = "observed whiff proportion",
x = "projected % whiff chance",
title = "Slider Whiff Model Prediction (w/ Sweet)")
## Fastball
# Model
ff_model_s <- glm(whiff ~ pitch_speed + pfx_z + plate_x + plate_z + zone +
strikes + pfx_total + speed_change + break_change + distance,
data = fastballs, family = binomial)
summary(ff_model_s)
##
## Call:
## glm(formula = whiff ~ pitch_speed + pfx_z + plate_x + plate_z +
## zone + strikes + pfx_total + speed_change + break_change +
## distance, family = binomial, data = fastballs)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.016000 0.882106 -13.622 < 0.0000000000000002 ***
## pitch_speed 0.086479 0.009195 9.405 < 0.0000000000000002 ***
## pfx_z 1.258882 0.171639 7.334 0.000000000000223 ***
## plate_x 0.069898 0.036546 1.913 0.055799 .
## plate_z 1.153443 0.046862 24.614 < 0.0000000000000002 ***
## zone -0.012318 0.006756 -1.823 0.068260 .
## strikes 0.163423 0.024729 6.608 0.000000000038824 ***
## pfx_total -1.503226 0.181106 -8.300 < 0.0000000000000002 ***
## speed_change 0.084469 0.023739 3.558 0.000373 ***
## break_change 1.597067 0.188845 8.457 < 0.0000000000000002 ***
## distance -1.370889 0.077209 -17.756 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 20320 on 31922 degrees of freedom
## Residual deviance: 18620 on 31912 degrees of freedom
## (33 observations deleted due to missingness)
## AIC: 18642
##
## Number of Fisher Scoring iterations: 6
# Model Predictions
ff_preds_s <- fastballs %>%
mutate(prediction_log = predict(ff_model_s, fastballs),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
ff_preds_s %>%
mutate(prediction = round(prediction, 2)) %>%
group_by(prediction) %>%
summarize(mean(whiff)) %>%
as.data.frame() %>%
ggplot(aes(x = prediction, y = `mean(whiff)`)) +
geom_point() +
coord_fixed() +
geom_smooth(se = FALSE) +
labs(y = "observed whiff proportion",
x = "projected % whiff chance",
title = "Fastball Whiff Model Prediction (w/ Sweet)")
rhp <- RHP %>%
filter(!is.na(pfx_z)) %>%
mutate(distance_sweet = sqrt(((plate_x - 0.85)^2)+((plate_z - 1.55)^2))) %>%
mutate(zone = case_when(distance < 2 ~ zone,
distance >= 2 & plate_z > (sz_top + sz_bot)/2 ~ 16,
distance >= 2 & plate_z <= (sz_top + sz_bot)/2 ~ 17)) %>%
mutate(ab_id = paste0(game_date, "_", player_id, "_", at_bat_number),
prev_ab_id = lead(ab_id, 1)) %>%
mutate(prev_pitch = ifelse(ab_id == prev_ab_id, lead(pitch_type, 1), NA)) %>%
select(-ab_id, -prev_ab_id) %>%
mutate(prev_pitch = case_when(prev_pitch %in% c("FF", "FC", "SI") ~ "Fastball",
prev_pitch %in% c("SL", "CU", "KC",
"SV", "ST", "CS") ~ "Breaking Ball",
prev_pitch %in% c("CH", "FS") ~ "Off Speed")) %>%
mutate(zone = relevel(as.factor(zone), ref = 5))
rhp <- rhp %>%
mutate(prev_pitch = ifelse(is.na(prev_pitch), "None", prev_pitch)) %>%
mutate(Count = paste0(balls, "-", strikes)) %>%
mutate(Count = as.factor(Count))
sliders2 <- rhp %>%
filter(hitter == "R") %>%
filter(pitch_type == "SL") %>%
mutate(count = paste0(balls, "_", strikes),
prev_pitch = as.factor(prev_pitch)) %>%
mutate(prev_pitch_ff = ifelse(prev_pitch == "FF", 1, 0))
fastballs2 <- rhp %>%
filter(hitter == "R") %>%
filter(pitch_type == fb_type) %>%
mutate(count = paste0(balls, "_", strikes),
prev_pitch = as.factor(prev_pitch))
rhp <- rhp %>%
mutate(is_barrel = is.barrel(LA = launch_angle, EV = launch_speed)) %>%
mutate(is_strike = ifelse(description %in% c("called_strike", "swinging_strike",
"foul_tip", "bunt_foul_tip", "foul_bunt",
"swinging_strike_blocked", "missed_bunt"),
1, 0))
# Batter Whiff
batter_stats <- rhp %>%
summarize(rate = mean(whiff),
pitches = n(),
.by = c(batter, pitch_type)) %>%
mutate(pitch_whiff = weighted.mean(rate, pitches),
.by = pitch_type) %>%
mutate(pred_bwhiff = (pitches / 300)*rate + ((300-pitches)/300)*pitch_whiff) %>%
mutate(pred_bwhiff = ifelse(pitches >= 300, rate, pred_bwhiff))
# Merging Whiff Prediction with RHP
rhp <- rhp %>%
left_join(select(batter_stats, batter, pitch_type, pred_bwhiff),
by = c("batter" = "batter", "pitch_type" = "pitch_type"))
rhp <- rhp %>%
mutate(prev_pitch = ifelse(is.na(prev_pitch), "None", prev_pitch)) %>%
mutate(Count = paste0(balls, "-", strikes)) %>%
mutate(Count = as.factor(Count))
# Barrel
batter_stats2 <- rhp %>%
summarize(rate = mean(is_barrel),
pitches = n(),
.by = c(batter, pitch_type)) %>%
mutate(pitch_barrel = weighted.mean(rate, pitches),
.by = pitch_type) %>%
mutate(pred_bbarrel = (pitches / 300)*rate + ((300-pitches)/300)*pitch_barrel) %>%
mutate(pred_bbarrel = ifelse(pitches >= 300, rate, pred_bbarrel))
rhp <- rhp %>%
left_join(select(batter_stats2, batter, pitch_type, pred_bbarrel),
by = c("batter" = "batter", "pitch_type" = "pitch_type"))
sliders3 <- rhp %>%
filter(hitter == "R") %>%
filter(pitch_type == "SL") %>%
mutate(count = paste0(balls, "_", strikes),
prev_pitch = as.factor(prev_pitch)) %>%
mutate(prev_pitch_ff = ifelse(prev_pitch == "FF", 1, 0))
fastballs3 <- rhp %>%
filter(hitter == "R") %>%
filter(pitch_type == fb_type) %>%
mutate(count = paste0(balls, "_", strikes),
prev_pitch = as.factor(prev_pitch))
# Initial Model
sl_model_whiff_all <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
count + pfx_total + speed_change + break_change + distance + prev_pitch +
speed_fb_diff + pfx_x_fb_diff + pfx_z_fb_diff + release_spin_rate +
I(plate_x^2) + I(plate_z^2) + pred_bwhiff,
data = sliders3, family = binomial)
# Model Evaluation
summary(sl_model_whiff_all)
##
## Call:
## glm(formula = whiff ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + count + pfx_total + speed_change + break_change +
## distance + prev_pitch + speed_fb_diff + pfx_x_fb_diff + pfx_z_fb_diff +
## release_spin_rate + I(plate_x^2) + I(plate_z^2) + pred_bwhiff,
## family = binomial, data = sliders3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.05572579 130.94356037 -0.008 0.993567
## pitch_speed 0.02429046 0.01003510 2.421 0.015497 *
## pfx_x 0.32528485 0.14288652 2.277 0.022815 *
## pfx_z 0.39834309 0.09595890 4.151 0.000033075878 ***
## plate_x 0.80748233 0.12765800 6.325 0.000000000253 ***
## plate_z 2.55594132 0.28344485 9.017 < 0.0000000000000002 ***
## zone1 0.67646339 0.26178958 2.584 0.009766 **
## zone2 0.67667552 0.18528884 3.652 0.000260 ***
## zone3 0.02289412 0.20965633 0.109 0.913045
## zone4 -0.58079680 0.27072960 -2.145 0.031929 *
## zone6 -0.17432469 0.14483353 -1.204 0.228736
## zone7 -0.04511564 0.24343685 -0.185 0.852972
## zone8 0.22867951 0.14490712 1.578 0.114540
## zone9 0.29923544 0.14898638 2.008 0.044593 *
## zone11 1.21977565 0.30442252 4.007 0.000061533691 ***
## zone12 -0.18944390 0.21032547 -0.901 0.367738
## zone13 0.49500499 0.21535459 2.299 0.021530 *
## zone14 0.52889407 0.17237051 3.068 0.002152 **
## zone16 -0.18404646 1.05480688 -0.174 0.861485
## zone17 -0.32604219 0.21788223 -1.496 0.134546
## count0_1 -9.89952634 130.93984983 -0.076 0.939734
## count0_2 -10.27089297 130.93985964 -0.078 0.937478
## count1_0 -9.99869531 130.93982429 -0.076 0.939132
## count1_1 -9.83202808 130.93985283 -0.075 0.940145
## count1_2 -10.06882229 130.93985405 -0.077 0.938706
## count2_0 -9.78812708 130.93991506 -0.075 0.940411
## count2_1 -9.63211779 130.93987042 -0.074 0.941359
## count2_2 -10.05250924 130.93985880 -0.077 0.938805
## count3_0 -10.91935396 130.94192293 -0.083 0.933541
## count3_1 -9.87521223 130.93999450 -0.075 0.939882
## count3_2 -9.95773351 130.93988071 -0.076 0.939381
## pfx_total -0.64169840 0.15105423 -4.248 0.000021555968 ***
## speed_change 0.06878315 0.02198705 3.128 0.001758 **
## break_change 0.48163126 0.15541588 3.099 0.001942 **
## distance 0.99372528 0.17076256 5.819 0.000000005908 ***
## prev_pitchFastball -0.16241984 0.04528214 -3.587 0.000335 ***
## prev_pitchNone -10.32088648 130.93983515 -0.079 0.937175
## prev_pitchOff Speed -0.02126857 0.09929736 -0.214 0.830398
## speed_fb_diff -0.07392525 0.01312000 -5.635 0.000000017552 ***
## pfx_x_fb_diff -0.03049100 0.06537696 -0.466 0.640938
## pfx_z_fb_diff 0.04868005 0.06507598 0.748 0.454430
## release_spin_rate 0.00034439 0.00009848 3.497 0.000470 ***
## I(plate_x^2) -0.89386802 0.07865626 -11.364 < 0.0000000000000002 ***
## I(plate_z^2) -0.63710529 0.06366125 -10.008 < 0.0000000000000002 ***
## pred_bwhiff 18.01795901 0.94440432 19.079 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 19130 on 22074 degrees of freedom
## Residual deviance: 17082 on 22030 degrees of freedom
## (31 observations deleted due to missingness)
## AIC: 17172
##
## Number of Fisher Scoring iterations: 10
# Refined Model
sl_model_whiff <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
pfx_total + speed_change + break_change + distance + prev_pitch +
speed_fb_diff + release_spin_rate +
I(plate_z^2) + pred_bwhiff,
data = sliders3, family = binomial)
# Model Evaluation
summary(sl_model_whiff)
##
## Call:
## glm(formula = whiff ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + pfx_total + speed_change + break_change +
## distance + prev_pitch + speed_fb_diff + release_spin_rate +
## I(plate_z^2) + pred_bwhiff, family = binomial, data = sliders3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.49113461 0.91541155 -9.276 < 0.0000000000000002 ***
## pitch_speed 0.02323610 0.00959649 2.421 0.015465 *
## pfx_x 0.31047257 0.13219128 2.349 0.018841 *
## pfx_z 0.45779612 0.08749780 5.232 0.00000016761 ***
## plate_x -0.14336419 0.08553803 -1.676 0.093733 .
## plate_z 0.74628734 0.20871951 3.576 0.000349 ***
## zone1 0.54083270 0.25643055 2.109 0.034938 *
## zone2 1.03289659 0.18080609 5.713 0.00000001112 ***
## zone3 0.82164815 0.19804066 4.149 0.00003340968 ***
## zone4 -0.92160171 0.26646902 -3.459 0.000543 ***
## zone6 0.45119119 0.13412429 3.364 0.000768 ***
## zone7 -0.18593965 0.23907119 -0.778 0.436711
## zone8 0.59609886 0.13993617 4.260 0.00002046179 ***
## zone9 1.10379034 0.13131235 8.406 < 0.0000000000000002 ***
## zone11 0.82240726 0.29744016 2.765 0.005693 **
## zone12 0.79658491 0.19337052 4.119 0.00003797378 ***
## zone13 0.58993031 0.21505990 2.743 0.006086 **
## zone14 1.47733794 0.15153304 9.749 < 0.0000000000000002 ***
## zone16 -0.30376615 1.05330192 -0.288 0.773045
## zone17 0.22697632 0.21207607 1.070 0.284503
## pfx_total -0.65308669 0.14905904 -4.381 0.00001179213 ***
## speed_change 0.05361032 0.02136886 2.509 0.012114 *
## break_change 0.47050907 0.15243457 3.087 0.002024 **
## distance -0.19574290 0.13277515 -1.474 0.140416
## prev_pitchFastball -0.14076785 0.04493543 -3.133 0.001732 **
## prev_pitchNone -0.34439055 0.05348173 -6.439 0.00000000012 ***
## prev_pitchOff Speed -0.00714444 0.09788183 -0.073 0.941814
## speed_fb_diff -0.06463770 0.01186208 -5.449 0.00000005062 ***
## release_spin_rate 0.00037130 0.00009683 3.834 0.000126 ***
## I(plate_z^2) -0.28655748 0.05043700 -5.681 0.00000001335 ***
## pred_bwhiff 18.65870926 0.93484343 19.959 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 19130 on 22074 degrees of freedom
## Residual deviance: 17288 on 22044 degrees of freedom
## (31 observations deleted due to missingness)
## AIC: 17350
##
## Number of Fisher Scoring iterations: 7
# Model Predictions
sl_preds_whiff <- sliders3 %>%
mutate(prediction_log = predict(sl_model_whiff, sliders3),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
ggplot(sl_preds_whiff, aes(x = prediction, y = as.numeric(whiff))) +
geom_point(alpha = 0.01) +
geom_smooth() +
geom_smooth(method = "lm") +
geom_abline(slope = 1, intercept = 0, color = "red") +
theme_bw()
# Initial Model
fb_model_whiff_all <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
count + pfx_total + speed_change + break_change + distance + prev_pitch +
release_spin_rate + I(plate_x^2) + I(plate_z^2) + pred_bwhiff,
data = fastballs3, family = binomial)
# Model Evaluation
summary(fb_model_whiff_all)
##
## Call:
## glm(formula = whiff ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + count + pfx_total + speed_change + break_change +
## distance + prev_pitch + release_spin_rate + I(plate_x^2) +
## I(plate_z^2) + pred_bwhiff, family = binomial, data = fastballs3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.5816274 119.4715727 -0.063 0.94940
## pitch_speed 0.0557803 0.0078739 7.084 0.000000000001398783 ***
## pfx_x 0.6463242 0.0808860 7.991 0.000000000000001343 ***
## pfx_z -1.1705641 0.1174326 -9.968 < 0.0000000000000002 ***
## plate_x -0.0565644 0.0638425 -0.886 0.37562
## plate_z 3.0664785 0.3787614 8.096 0.000000000000000568 ***
## zone1 0.0957271 0.1201531 0.797 0.42562
## zone2 0.1247986 0.0970758 1.286 0.19859
## zone3 0.2886408 0.1126223 2.563 0.01038 *
## zone4 -0.4660176 0.1183909 -3.936 0.000082760092236976 ***
## zone6 0.1620347 0.0943015 1.718 0.08575 .
## zone7 0.1196145 0.1593547 0.751 0.45288
## zone8 -0.2063144 0.1310306 -1.575 0.11536
## zone9 -0.2041031 0.1431304 -1.426 0.15387
## zone11 0.2641286 0.1442519 1.831 0.06710 .
## zone12 0.0254171 0.1395724 0.182 0.85550
## zone13 0.9422216 0.1658940 5.680 0.000000013496167951 ***
## zone14 0.3223702 0.1583034 2.036 0.04171 *
## zone16 -1.8001997 0.3403438 -5.289 0.000000122747212274 ***
## zone17 1.1591083 0.4145653 2.796 0.00517 **
## count0_1 -6.6130700 119.4680996 -0.055 0.95586
## count0_2 -6.8621109 119.4681077 -0.057 0.95420
## count1_0 -6.7844000 119.4680778 -0.057 0.95471
## count1_1 -6.5961524 119.4681027 -0.055 0.95597
## count1_2 -6.6379474 119.4680981 -0.056 0.95569
## count2_0 -6.8260742 119.4681281 -0.057 0.95444
## count2_1 -6.5567532 119.4681139 -0.055 0.95623
## count2_2 -6.6337006 119.4680999 -0.056 0.95572
## count3_0 -8.6812066 119.4685239 -0.073 0.94207
## count3_1 -6.8714283 119.4681450 -0.058 0.95413
## count3_2 -6.8483351 119.4681093 -0.057 0.95429
## pfx_total 0.8451315 0.1825259 4.630 0.000003653101190601 ***
## speed_change 0.0909285 0.0218399 4.163 0.000031353786683178 ***
## break_change 1.5237499 0.1692337 9.004 < 0.0000000000000002 ***
## distance 0.2734813 0.1723536 1.587 0.11257
## prev_pitchFastball 0.0614724 0.0437354 1.406 0.15986
## prev_pitchNone -7.0729502 119.4680892 -0.059 0.95279
## prev_pitchOff Speed 0.0356306 0.0847117 0.421 0.67404
## release_spin_rate 0.0004958 0.0001154 4.295 0.000017442646480823 ***
## I(plate_x^2) -0.7993284 0.0797653 -10.021 < 0.0000000000000002 ***
## I(plate_z^2) -0.4638120 0.0712547 -6.509 0.000000000075547577 ***
## pred_bwhiff 17.4920563 0.6896532 25.364 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 25315 on 42647 degrees of freedom
## Residual deviance: 22519 on 42606 degrees of freedom
## (129 observations deleted due to missingness)
## AIC: 22603
##
## Number of Fisher Scoring iterations: 9
# Refined Model
fb_model_whiff <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
pfx_total + speed_change + break_change + distance + prev_pitch +
release_spin_rate + I(plate_x^2) + I(plate_z^2) + pred_bwhiff,
data = fastballs3, family = binomial)
# Model Evaluation
summary(fb_model_whiff)
##
## Call:
## glm(formula = whiff ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + pfx_total + speed_change + break_change +
## distance + prev_pitch + release_spin_rate + I(plate_x^2) +
## I(plate_z^2) + pred_bwhiff, family = binomial, data = fastballs3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -14.2154028 0.9000199 -15.795 < 0.0000000000000002 ***
## pitch_speed 0.0550092 0.0078550 7.003 0.000000000002503755 ***
## pfx_x 0.6338384 0.0806196 7.862 0.000000000000003778 ***
## pfx_z -1.1561825 0.1172466 -9.861 < 0.0000000000000002 ***
## plate_x -0.0615590 0.0636244 -0.968 0.33328
## plate_z 3.0392831 0.3777182 8.046 0.000000000000000852 ***
## zone1 0.0929298 0.1199153 0.775 0.43836
## zone2 0.1201440 0.0968204 1.241 0.21464
## zone3 0.2975429 0.1123314 2.649 0.00808 **
## zone4 -0.4653376 0.1182299 -3.936 0.000082895313099199 ***
## zone6 0.1659789 0.0940813 1.764 0.07770 .
## zone7 0.1231180 0.1591132 0.774 0.43906
## zone8 -0.2125384 0.1309014 -1.624 0.10445
## zone9 -0.2114364 0.1430251 -1.478 0.13932
## zone11 0.2615133 0.1440152 1.816 0.06939 .
## zone12 0.0279241 0.1394753 0.200 0.84132
## zone13 0.9459586 0.1657187 5.708 0.000000011416409673 ***
## zone14 0.3223060 0.1581047 2.039 0.04149 *
## zone16 -1.8128981 0.3402077 -5.329 0.000000098864591113 ***
## zone17 1.1355316 0.4142803 2.741 0.00613 **
## pfx_total 0.8280238 0.1821148 4.547 0.000005449044810380 ***
## speed_change 0.1002090 0.0209493 4.783 0.000001723549942532 ***
## break_change 1.5058941 0.1686232 8.931 < 0.0000000000000002 ***
## distance 0.2734463 0.1719881 1.590 0.11185
## prev_pitchFastball 0.0332318 0.0433347 0.767 0.44316
## prev_pitchNone -0.3721674 0.0522667 -7.121 0.000000000001075026 ***
## prev_pitchOff Speed 0.0288480 0.0841504 0.343 0.73174
## release_spin_rate 0.0005103 0.0001152 4.431 0.000009365819796711 ***
## I(plate_x^2) -0.7876100 0.0794838 -9.909 < 0.0000000000000002 ***
## I(plate_z^2) -0.4586255 0.0710512 -6.455 0.000000000108317436 ***
## pred_bwhiff 17.5208378 0.6871512 25.498 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 25315 on 42647 degrees of freedom
## Residual deviance: 22622 on 42617 degrees of freedom
## (129 observations deleted due to missingness)
## AIC: 22684
##
## Number of Fisher Scoring iterations: 7
# Model Predictions
fb_preds_whiff <- fastballs3 %>%
mutate(prediction_log = predict(fb_model_whiff, fastballs3),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
ggplot(fb_preds_whiff, aes(x = prediction, y = as.numeric(whiff))) +
geom_point(alpha = 0.01) +
geom_smooth() +
geom_smooth(method = "lm") +
geom_abline(slope = 1, intercept = 0, color = "red") +
theme_bw()
# Initial Model
sl_model_barrel_all <- glm(is_barrel ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
count + pfx_total + speed_change + break_change + distance + prev_pitch +
speed_fb_diff + pfx_x_fb_diff + pfx_z_fb_diff + release_spin_rate +
I(plate_x^2) + I(plate_z^2) + pred_bbarrel,
data = sliders3, family = binomial)
# Model Evaluation
summary(sl_model_barrel_all)
##
## Call:
## glm(formula = is_barrel ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + count + pfx_total + speed_change + break_change +
## distance + prev_pitch + speed_fb_diff + pfx_x_fb_diff + pfx_z_fb_diff +
## release_spin_rate + I(plate_x^2) + I(plate_z^2) + pred_bbarrel,
## family = binomial, data = sliders3)
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 7.293875026 18646.259525496 0.000
## pitch_speed -0.081909424 0.031449146 -2.605
## pfx_x -0.453897008 0.456442906 -0.994
## pfx_z -0.362468383 0.305897768 -1.185
## plate_x -0.546581403 0.420562756 -1.300
## plate_z 6.937112624 2.582609967 2.686
## zone1 -0.393990464 0.580685974 -0.678
## zone2 -0.188618258 0.399110399 -0.473
## zone3 0.074022838 0.515658794 0.144
## zone4 0.184051292 0.360808591 0.510
## zone6 0.214614109 0.308348434 0.696
## zone7 0.678258744 0.450161697 1.507
## zone8 -0.162705876 0.331497844 -0.491
## zone9 -0.213937320 0.405685694 -0.527
## zone11 -0.134046455 0.959872134 -0.140
## zone12 -0.018617622 0.887775004 -0.021
## zone13 -1.397251651 1.138631154 -1.227
## zone14 -1.415418799 0.793204765 -1.784
## zone16 -10.692157389 2047.215128740 -0.005
## zone17 -9.660105428 338.209372556 -0.029
## count0_1 -13.211829787 18646.259012691 -0.001
## count0_2 -12.504457471 18646.259013646 -0.001
## count1_0 -13.120863562 18646.259010632 -0.001
## count1_1 -13.207366384 18646.259012947 -0.001
## count1_2 -12.772845644 18646.259012899 -0.001
## count2_0 -13.859592726 18646.259021090 -0.001
## count2_1 -12.564168650 18646.259013105 -0.001
## count2_2 -12.681792595 18646.259013041 -0.001
## count3_0 -30.850483170 19171.044211973 -0.002
## count3_1 -12.707591888 18646.259017700 -0.001
## count3_2 -12.336059230 18646.259013132 -0.001
## pfx_total 0.328569112 0.471870190 0.696
## speed_change -0.146712387 0.073743383 -1.989
## break_change -0.548699599 0.491310983 -1.117
## distance -0.234443556 0.769579412 -0.305
## prev_pitchFastball 0.118490700 0.152430096 0.777
## prev_pitchNone -13.373199243 18646.259011235 -0.001
## prev_pitchOff Speed 0.061249736 0.317536347 0.193
## speed_fb_diff 0.097073177 0.045164905 2.149
## pfx_x_fb_diff -0.041980060 0.222089094 -0.189
## pfx_z_fb_diff -0.027667102 0.223107751 -0.124
## release_spin_rate -0.000007429 0.000340174 -0.022
## I(plate_x^2) -2.073882655 0.703505253 -2.948
## I(plate_z^2) -1.425644038 0.522422545 -2.729
## pred_bbarrel 162.123058716 11.221906112 14.447
## Pr(>|z|)
## (Intercept) 0.99969
## pitch_speed 0.00920 **
## pfx_x 0.32002
## pfx_z 0.23604
## plate_x 0.19372
## plate_z 0.00723 **
## zone1 0.49746
## zone2 0.63650
## zone3 0.88586
## zone4 0.60998
## zone6 0.48642
## zone7 0.13189
## zone8 0.62355
## zone9 0.59795
## zone11 0.88894
## zone12 0.98327
## zone13 0.21977
## zone14 0.07435 .
## zone16 0.99583
## zone17 0.97721
## count0_1 0.99943
## count0_2 0.99946
## count1_0 0.99944
## count1_1 0.99943
## count1_2 0.99945
## count2_0 0.99941
## count2_1 0.99946
## count2_2 0.99946
## count3_0 0.99872
## count3_1 0.99946
## count3_2 0.99947
## pfx_total 0.48623
## speed_change 0.04665 *
## break_change 0.26408
## distance 0.76064
## prev_pitchFastball 0.43696
## prev_pitchNone 0.99943
## prev_pitchOff Speed 0.84704
## speed_fb_diff 0.03161 *
## pfx_x_fb_diff 0.85007
## pfx_z_fb_diff 0.90131
## release_spin_rate 0.98258
## I(plate_x^2) 0.00320 **
## I(plate_z^2) 0.00635 **
## pred_bbarrel < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2844.2 on 22074 degrees of freedom
## Residual deviance: 2101.2 on 22030 degrees of freedom
## (31 observations deleted due to missingness)
## AIC: 2191.2
##
## Number of Fisher Scoring iterations: 20
# Refined Model
sl_model_barrel <- glm(is_barrel ~ pitch_speed + plate_z + zone +
prev_pitch + speed_fb_diff + I(plate_x^2) + I(plate_z^2) + pred_bbarrel,
data = sliders3, family = binomial)
# Model Evaluation
summary(sl_model_barrel)
##
## Call:
## glm(formula = is_barrel ~ pitch_speed + plate_z + zone + prev_pitch +
## speed_fb_diff + I(plate_x^2) + I(plate_z^2) + pred_bbarrel,
## family = binomial, data = sliders3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.01014 3.32848 -1.806 0.07097 .
## pitch_speed -0.08675 0.02824 -3.072 0.00213 **
## plate_z 7.42214 1.67011 4.444 0.00000883 ***
## zone1 -0.10985 0.49604 -0.221 0.82474
## zone2 -0.23491 0.35924 -0.654 0.51318
## zone3 -0.19214 0.44340 -0.433 0.66478
## zone4 0.48358 0.25840 1.871 0.06129 .
## zone6 -0.07541 0.23326 -0.323 0.74648
## zone7 0.89451 0.33597 2.662 0.00776 **
## zone8 -0.22664 0.27866 -0.813 0.41603
## zone9 -0.50425 0.32297 -1.561 0.11846
## zone11 0.19732 0.86422 0.228 0.81940
## zone12 -0.40796 0.84107 -0.485 0.62765
## zone13 -1.05222 1.10086 -0.956 0.33916
## zone14 -1.78436 0.75086 -2.376 0.01748 *
## zone16 -10.32459 2114.92382 -0.005 0.99610
## zone17 -10.04117 358.95710 -0.028 0.97768
## prev_pitchFastball 0.12237 0.15035 0.814 0.41570
## prev_pitchNone -0.42001 0.17970 -2.337 0.01942 *
## prev_pitchOff Speed 0.12976 0.31370 0.414 0.67913
## speed_fb_diff 0.08374 0.03580 2.339 0.01932 *
## I(plate_x^2) -2.37326 0.49198 -4.824 0.00000141 ***
## I(plate_z^2) -1.52392 0.34552 -4.410 0.00001031 ***
## pred_bbarrel 160.34359 11.04768 14.514 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2844.6 on 22092 degrees of freedom
## Residual deviance: 2135.5 on 22069 degrees of freedom
## (13 observations deleted due to missingness)
## AIC: 2183.5
##
## Number of Fisher Scoring iterations: 20
# Model Predictions
sl_preds_barrel <- sliders3 %>%
mutate(prediction_log = predict(sl_model_barrel, sliders3),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
ggplot(sl_preds_barrel, aes(x = prediction, y = as.numeric(is_barrel))) +
geom_point(alpha = 0.01) +
geom_smooth() +
geom_smooth(method = "lm") +
geom_abline(slope = 1, intercept = 0, color = "red") +
theme_bw()
# Initial Model
fb_model_barrel_all <- glm(is_barrel ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
count + pfx_total + speed_change + break_change + distance + prev_pitch +
release_spin_rate + I(plate_x^2) + I(plate_z^2) + pred_bbarrel,
data = fastballs3, family = binomial)
# Model Evaluation
summary(fb_model_barrel_all)
##
## Call:
## glm(formula = is_barrel ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + count + pfx_total + speed_change + break_change +
## distance + prev_pitch + release_spin_rate + I(plate_x^2) +
## I(plate_z^2) + pred_bbarrel, family = binomial, data = fastballs3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 14.1690697 17730.3514959 0.001 0.99936
## pitch_speed -0.0908045 0.0176352 -5.149 0.000000262
## pfx_x -0.1107500 0.2061660 -0.537 0.59114
## pfx_z 0.3520933 0.2950100 1.193 0.23268
## plate_x -0.3151407 0.2052985 -1.535 0.12477
## plate_z 5.5502796 1.2987011 4.274 0.000019224
## zone1 -0.1820119 0.2647474 -0.687 0.49177
## zone2 0.0840930 0.1886724 0.446 0.65581
## zone3 0.1178921 0.2595632 0.454 0.64969
## zone4 0.3193995 0.1881786 1.697 0.08964
## zone6 0.1669336 0.1875701 0.890 0.37348
## zone7 0.4791432 0.2904666 1.650 0.09903
## zone8 -0.1615013 0.2334322 -0.692 0.48903
## zone9 -0.3147077 0.3116269 -1.010 0.31255
## zone11 0.0296693 0.3580654 0.083 0.93396
## zone12 0.4714067 0.3472315 1.358 0.17459
## zone13 0.5349522 0.4132797 1.294 0.19552
## zone14 -14.3917040 221.4000680 -0.065 0.94817
## zone16 -11.6632825 424.4048236 -0.027 0.97808
## zone17 -10.1341457 526.2729106 -0.019 0.98464
## count0_1 -16.7659764 17730.3513307 -0.001 0.99925
## count0_2 -16.6629977 17730.3513313 -0.001 0.99925
## count1_0 -16.7757680 17730.3513299 -0.001 0.99925
## count1_1 -16.5670098 17730.3513307 -0.001 0.99925
## count1_2 -16.4600242 17730.3513308 -0.001 0.99926
## count2_0 -16.3770285 17730.3513311 -0.001 0.99926
## count2_1 -16.3456247 17730.3513309 -0.001 0.99926
## count2_2 -16.1580357 17730.3513306 -0.001 0.99927
## count3_0 -16.9047437 17730.3513329 -0.001 0.99924
## count3_1 -16.1803143 17730.3513313 -0.001 0.99927
## count3_2 -16.2369068 17730.3513307 -0.001 0.99927
## pfx_total -0.1464570 0.4351920 -0.337 0.73647
## speed_change -0.0964585 0.0456430 -2.113 0.03457
## break_change -0.9368259 0.3748858 -2.499 0.01246
## distance -0.3962217 0.4272619 -0.927 0.35374
## prev_pitchFastball 0.0625169 0.0952142 0.657 0.51144
## prev_pitchNone -17.0356334 17730.3513303 -0.001 0.99923
## prev_pitchOff Speed -0.1473366 0.1822982 -0.808 0.41897
## release_spin_rate -0.0006799 0.0002429 -2.799 0.00512
## I(plate_x^2) -1.5757072 0.3077263 -5.120 0.000000305
## I(plate_z^2) -1.0701619 0.2482737 -4.310 0.000016295
## pred_bbarrel 109.1558372 5.8158491 18.769 < 0.0000000000000002
##
## (Intercept)
## pitch_speed ***
## pfx_x
## pfx_z
## plate_x
## plate_z ***
## zone1
## zone2
## zone3
## zone4 .
## zone6
## zone7 .
## zone8
## zone9
## zone11
## zone12
## zone13
## zone14
## zone16
## zone17
## count0_1
## count0_2
## count1_0
## count1_1
## count1_2
## count2_0
## count2_1
## count2_2
## count3_0
## count3_1
## count3_2
## pfx_total
## speed_change *
## break_change *
## distance
## prev_pitchFastball
## prev_pitchNone
## prev_pitchOff Speed
## release_spin_rate **
## I(plate_x^2) ***
## I(plate_z^2) ***
## pred_bbarrel ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 7337.6 on 42647 degrees of freedom
## Residual deviance: 6177.1 on 42606 degrees of freedom
## (129 observations deleted due to missingness)
## AIC: 6261.1
##
## Number of Fisher Scoring iterations: 19
# Refined Model
fb_model_barrel <- glm(is_barrel ~ pitch_speed + pfx_z + plate_z + zone +
break_change + prev_pitch +
release_spin_rate + I(plate_x^2) + I(plate_z^2) + pred_bbarrel,
data = fastballs3, family = binomial)
# Model Evaluation
summary(fb_model_barrel)
##
## Call:
## glm(formula = is_barrel ~ pitch_speed + pfx_z + plate_z + zone +
## break_change + prev_pitch + release_spin_rate + I(plate_x^2) +
## I(plate_z^2) + pred_bbarrel, family = binomial, data = fastballs3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.3077526 1.8946710 -1.746 0.08084 .
## pitch_speed -0.0944753 0.0157784 -5.988 0.000000002129105663 ***
## pfx_z 0.2619646 0.1146813 2.284 0.02235 *
## plate_z 6.6124560 0.8723678 7.580 0.000000000000034584 ***
## zone1 -0.1332453 0.2135455 -0.624 0.53265
## zone2 -0.0091205 0.1572915 -0.058 0.95376
## zone3 -0.1650322 0.2016200 -0.819 0.41305
## zone4 0.4150531 0.1427792 2.907 0.00365 **
## zone6 -0.0599910 0.1406864 -0.426 0.66980
## zone7 0.5368172 0.2333650 2.300 0.02143 *
## zone8 -0.2251755 0.2084197 -1.080 0.27997
## zone9 -0.5748546 0.2686229 -2.140 0.03235 *
## zone11 0.0951644 0.3029716 0.314 0.75344
## zone12 0.1191472 0.2829984 0.421 0.67374
## zone13 0.6782115 0.3473910 1.952 0.05090 .
## zone14 -14.7673307 220.5172265 -0.067 0.94661
## zone16 -11.5851355 427.2020747 -0.027 0.97837
## zone17 -10.2612927 539.2332259 -0.019 0.98482
## break_change -1.0184622 0.3284472 -3.101 0.00193 **
## prev_pitchFastball 0.0412383 0.0941765 0.438 0.66147
## prev_pitchNone -0.5083031 0.1124689 -4.519 0.000006198651259736 ***
## prev_pitchOff Speed -0.0781888 0.1806309 -0.433 0.66511
## release_spin_rate -0.0007461 0.0002314 -3.224 0.00126 **
## I(plate_x^2) -1.7720270 0.2178009 -8.136 0.000000000000000409 ***
## I(plate_z^2) -1.2750311 0.1584656 -8.046 0.000000000000000855 ***
## pred_bbarrel 108.2006073 5.7652900 18.768 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 7339.0 on 42689 degrees of freedom
## Residual deviance: 6211.6 on 42664 degrees of freedom
## (87 observations deleted due to missingness)
## AIC: 6263.6
##
## Number of Fisher Scoring iterations: 19
# Model Predictions
fb_preds_barrel <- fastballs3 %>%
mutate(prediction_log = predict(fb_model_barrel, fastballs3),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
ggplot(fb_preds_barrel, aes(x = prediction, y = as.numeric(is_barrel))) +
geom_point(alpha = 0.01) +
geom_smooth() +
geom_smooth(method = "lm") +
geom_abline(slope = 1, intercept = 0, color = "red") +
theme_bw()
# Initial Model
sl_model_strike_all <- glm(is_strike ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
count + pfx_total + speed_change + break_change + distance + prev_pitch +
speed_fb_diff + pfx_x_fb_diff + pfx_z_fb_diff + release_spin_rate +
I(plate_x^2) + I(plate_z^2),
data = sliders3, family = binomial)
# Model Evaluation
summary(sl_model_strike_all)
##
## Call:
## glm(formula = is_strike ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + count + pfx_total + speed_change + break_change +
## distance + prev_pitch + speed_fb_diff + pfx_x_fb_diff + pfx_z_fb_diff +
## release_spin_rate + I(plate_x^2) + I(plate_z^2), family = binomial,
## data = sliders3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.75024001 79.39850610 0.060 0.95229
## pitch_speed 0.00925384 0.00791292 1.169 0.24222
## pfx_x 0.25808837 0.11130676 2.319 0.02041 *
## pfx_z 0.11982747 0.07441403 1.610 0.10734
## plate_x 0.46897830 0.09065777 5.173 0.00000023028778269 ***
## plate_z 2.45280263 0.22575675 10.865 < 0.0000000000000002 ***
## zone1 1.19664853 0.15411546 7.765 0.00000000000000819 ***
## zone2 0.67612487 0.11719114 5.769 0.00000000795450029 ***
## zone3 0.55793916 0.13179678 4.233 0.00002302569312602 ***
## zone4 0.49378629 0.12135859 4.069 0.00004725174229177 ***
## zone6 -0.03804441 0.09187810 -0.414 0.67882
## zone7 0.47201047 0.14628566 3.227 0.00125 **
## zone8 0.05410273 0.09741951 0.555 0.57865
## zone9 0.12880167 0.10419625 1.236 0.21640
## zone11 -0.48623687 0.21247328 -2.288 0.02211 *
## zone12 -0.45361904 0.14641379 -3.098 0.00195 **
## zone13 -0.48151283 0.16669678 -2.889 0.00387 **
## zone14 -0.16530831 0.12688952 -1.303 0.19265
## zone16 -1.81037578 1.03597979 -1.748 0.08055 .
## zone17 -0.26649423 0.17147450 -1.554 0.12015
## count0_1 -10.43100365 79.39479176 -0.131 0.89547
## count0_2 -10.73412348 79.39480416 -0.135 0.89245
## count1_0 -9.98768532 79.39476602 -0.126 0.89989
## count1_1 -10.32495397 79.39479497 -0.130 0.89653
## count1_2 -10.68438113 79.39479772 -0.135 0.89295
## count2_0 -9.68952636 79.39485026 -0.122 0.90287
## count2_1 -10.22369978 79.39481514 -0.129 0.89754
## count2_2 -10.72622292 79.39480296 -0.135 0.89253
## count3_0 -9.25413212 79.39567788 -0.117 0.90721
## count3_1 -10.08062665 79.39492292 -0.127 0.89897
## count3_2 -10.64873659 79.39482608 -0.134 0.89330
## pfx_total -0.52072766 0.11713484 -4.446 0.00000876712754740 ***
## speed_change 0.03223030 0.01723334 1.870 0.06145 .
## break_change 0.28490650 0.12116882 2.351 0.01871 *
## distance 0.85074453 0.13280061 6.406 0.00000000014921128 ***
## prev_pitchFastball -0.16188360 0.03758216 -4.307 0.00001651408316119 ***
## prev_pitchNone -9.88344393 79.39477706 -0.124 0.90093
## prev_pitchOff Speed -0.03574840 0.08399368 -0.426 0.67039
## speed_fb_diff -0.06257527 0.01042094 -6.005 0.00000000191614724 ***
## pfx_x_fb_diff 0.00500042 0.05148048 0.097 0.92262
## pfx_z_fb_diff 0.03899110 0.05179426 0.753 0.45157
## release_spin_rate 0.00034815 0.00007784 4.473 0.00000772983997455 ***
## I(plate_x^2) -0.90577992 0.06212935 -14.579 < 0.0000000000000002 ***
## I(plate_z^2) -0.55759221 0.04897547 -11.385 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 27338 on 22074 degrees of freedom
## Residual deviance: 24690 on 22031 degrees of freedom
## (31 observations deleted due to missingness)
## AIC: 24778
##
## Number of Fisher Scoring iterations: 9
# Refined Model
sliders3 <- sliders3 %>%
mutate(fb_prev = ifelse(prev_pitch == "Fastball", 1, 0)) %>%
mutate(count_s = case_when(count %in% c("0_0", "1_1", "2_2") ~ "Even",
count %in% c("1_0", "2_1", "2_0", "3_0", "3_1") ~ "Hitter",
count %in% c("0_1", "0_2", "1_2") ~ "Pitcher",
count == "3_2" ~ "Full"))
sl_model_strike <- glm(is_strike ~ pfx_x + pfx_z + plate_x + plate_z + zone +
count_s + pfx_total + break_change + distance + prev_pitch +
speed_fb_diff + release_spin_rate +
I(plate_x^2) + I(plate_z^2),
data = sliders3, family = binomial)
# Model Evaluation
summary(sl_model_strike)
##
## Call:
## glm(formula = is_strike ~ pfx_x + pfx_z + plate_x + plate_z +
## zone + count_s + pfx_total + break_change + distance + prev_pitch +
## speed_fb_diff + release_spin_rate + I(plate_x^2) + I(plate_z^2),
## family = binomial, data = sliders3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.95025380 0.37550196 -13.183 < 0.0000000000000002 ***
## pfx_x 0.23839221 0.10328061 2.308 0.02099 *
## pfx_z 0.18491137 0.06590690 2.806 0.00502 **
## plate_x 0.46386633 0.09033130 5.135 0.0000002818933587 ***
## plate_z 2.45803270 0.22543449 10.904 < 0.0000000000000002 ***
## zone1 1.19268424 0.15378690 7.755 0.0000000000000088 ***
## zone2 0.66944751 0.11694487 5.724 0.0000000103756699 ***
## zone3 0.55258438 0.13153879 4.201 0.0000265828442377 ***
## zone4 0.49157300 0.12118791 4.056 0.0000498588881342 ***
## zone6 -0.04426440 0.09169664 -0.483 0.62929
## zone7 0.47195408 0.14588479 3.235 0.00122 **
## zone8 0.05236311 0.09726019 0.538 0.59031
## zone9 0.13024189 0.10397632 1.253 0.21035
## zone11 -0.45938264 0.21207454 -2.166 0.03030 *
## zone12 -0.44645176 0.14612246 -3.055 0.00225 **
## zone13 -0.48725550 0.16628431 -2.930 0.00339 **
## zone14 -0.16279605 0.12661978 -1.286 0.19855
## zone16 -1.80640227 1.03557477 -1.744 0.08110 .
## zone17 -0.26112200 0.17099826 -1.527 0.12675
## count_sFull -0.15783734 0.09108560 -1.733 0.08312 .
## count_sHitter 0.46310520 0.05118051 9.048 < 0.0000000000000002 ***
## count_sPitcher -0.09325065 0.04506279 -2.069 0.03851 *
## pfx_total -0.51880950 0.11441163 -4.535 0.0000057716281996 ***
## break_change 0.28892243 0.11733979 2.462 0.01381 *
## distance 0.83850669 0.13257275 6.325 0.0000000002534302 ***
## prev_pitchFastball -0.14942047 0.03734628 -4.001 0.0000630896086984 ***
## prev_pitchNone 0.59426251 0.05044210 11.781 < 0.0000000000000002 ***
## prev_pitchOff Speed -0.07509416 0.08330675 -0.901 0.36737
## speed_fb_diff -0.05304525 0.00805446 -6.586 0.0000000000452374 ***
## release_spin_rate 0.00037446 0.00007346 5.097 0.0000003445692910 ***
## I(plate_x^2) -0.90294934 0.06193532 -14.579 < 0.0000000000000002 ***
## I(plate_z^2) -0.56083909 0.04890608 -11.468 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 27338 on 22074 degrees of freedom
## Residual deviance: 24773 on 22043 degrees of freedom
## (31 observations deleted due to missingness)
## AIC: 24837
##
## Number of Fisher Scoring iterations: 7
# Model Predictions
sl_preds_strike <- sliders3 %>%
mutate(prediction_log = predict(sl_model_strike, sliders3),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
ggplot(sl_preds_strike, aes(x = prediction, y = as.numeric(is_strike))) +
geom_point(alpha = 0.01) +
geom_smooth() +
geom_smooth(method = "lm") +
geom_abline(slope = 1, intercept = 0, color = "red") +
theme_bw()
# Initial Model
fb_model_strike_all <- glm(is_strike ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
count + pfx_total + speed_change + break_change + distance + prev_pitch +
release_spin_rate + I(plate_x^2) + I(plate_z^2),
data = fastballs3, family = binomial)
# Model Evaluation
summary(fb_model_strike_all)
##
## Call:
## glm(formula = is_strike ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + count + pfx_total + speed_change + break_change +
## distance + prev_pitch + release_spin_rate + I(plate_x^2) +
## I(plate_z^2), family = binomial, data = fastballs3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.24780405 72.46554118 -0.003 0.997272
## pitch_speed 0.02129738 0.00528230 4.032 0.00005534201621050 ***
## pfx_x 0.18963848 0.05675477 3.341 0.000834 ***
## pfx_z -0.31760698 0.07655709 -4.149 0.00003344721864518 ***
## plate_x 0.37177007 0.04778935 7.779 0.00000000000000729 ***
## plate_z 5.08173312 0.27502030 18.478 < 0.0000000000000002 ***
## zone1 0.12145573 0.08266136 1.469 0.141747
## zone2 -0.02041481 0.06564328 -0.311 0.755804
## zone3 0.43666709 0.07582591 5.759 0.00000000847085094 ***
## zone4 -0.07727690 0.06987081 -1.106 0.268728
## zone6 0.65337265 0.06014767 10.863 < 0.0000000000000002 ***
## zone7 0.94310736 0.08985345 10.496 < 0.0000000000000002 ***
## zone8 0.70740731 0.07008825 10.093 < 0.0000000000000002 ***
## zone9 1.36044590 0.07962648 17.085 < 0.0000000000000002 ***
## zone11 0.05481168 0.10141625 0.540 0.588878
## zone12 -0.24177778 0.09350028 -2.586 0.009714 **
## zone13 0.36667792 0.11207328 3.272 0.001069 **
## zone14 -0.14945135 0.09826357 -1.521 0.128279
## zone16 -0.94110617 0.30475586 -3.088 0.002015 **
## zone17 -0.27910981 0.37074139 -0.753 0.451545
## count0_1 -10.20349183 72.46288909 -0.141 0.888020
## count0_2 -10.64342382 72.46290162 -0.147 0.883226
## count1_0 -9.89620629 72.46287376 -0.137 0.891371
## count1_1 -10.34154275 72.46289181 -0.143 0.886515
## count1_2 -10.58996475 72.46289249 -0.146 0.883808
## count2_0 -10.00139783 72.46290371 -0.138 0.890224
## count2_1 -10.47433734 72.46290143 -0.145 0.885068
## count2_2 -10.78048270 72.46289400 -0.149 0.881733
## count3_0 -8.90730217 72.46293470 -0.123 0.902169
## count3_1 -10.49705741 72.46291931 -0.145 0.884821
## count3_2 -11.05790860 72.46290199 -0.153 0.878713
## pfx_total 0.35263722 0.12021641 2.933 0.003353 **
## speed_change 0.01966765 0.01396969 1.408 0.159167
## break_change 0.88355297 0.11146971 7.926 0.00000000000000226 ***
## distance 0.78333765 0.11647205 6.726 0.00000000001749404 ***
## prev_pitchFastball 0.06055842 0.03134423 1.932 0.053354 .
## prev_pitchNone -9.52896492 72.46288246 -0.132 0.895379
## prev_pitchOff Speed 0.06842237 0.06020190 1.137 0.255727
## release_spin_rate 0.00034884 0.00007543 4.625 0.00000375390785099 ***
## I(plate_x^2) -1.33742770 0.06047928 -22.114 < 0.0000000000000002 ***
## I(plate_z^2) -0.95146455 0.05341929 -17.811 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 52121 on 42647 degrees of freedom
## Residual deviance: 43719 on 42607 degrees of freedom
## (129 observations deleted due to missingness)
## AIC: 43801
##
## Number of Fisher Scoring iterations: 8
# Refined Model
fb_model_strike <- glm(is_strike ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
count + pfx_total + break_change + distance +
release_spin_rate + I(plate_x^2) + I(plate_z^2),
data = fastballs3, family = binomial)
# Model Evaluation
summary(fb_model_strike)
##
## Call:
## glm(formula = is_strike ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + count + pfx_total + break_change + distance +
## release_spin_rate + I(plate_x^2) + I(plate_z^2), family = binomial,
## data = fastballs3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.01959874 0.59247854 -16.911 < 0.0000000000000002 ***
## pitch_speed 0.02377242 0.00499255 4.762 0.000001920851941636 ***
## pfx_x 0.18436160 0.05667941 3.253 0.001143 **
## pfx_z -0.31509822 0.07641856 -4.123 0.000037344943541671 ***
## plate_x 0.37389991 0.04774599 7.831 0.000000000000004839 ***
## plate_z 5.09165838 0.27486149 18.524 < 0.0000000000000002 ***
## zone1 0.12371525 0.08258168 1.498 0.134108
## zone2 -0.02048224 0.06558575 -0.312 0.754815
## zone3 0.43733275 0.07575427 5.773 0.000000007785190804 ***
## zone4 -0.07663348 0.06980848 -1.098 0.272306
## zone6 0.65471683 0.06008547 10.896 < 0.0000000000000002 ***
## zone7 0.94474809 0.08976595 10.525 < 0.0000000000000002 ***
## zone8 0.70972523 0.07005044 10.132 < 0.0000000000000002 ***
## zone9 1.36380395 0.07958839 17.136 < 0.0000000000000002 ***
## zone11 0.05476433 0.10129449 0.541 0.588752
## zone12 -0.24069515 0.09339496 -2.577 0.009961 **
## zone13 0.36887777 0.11199130 3.294 0.000988 ***
## zone14 -0.14452084 0.09818047 -1.472 0.141023
## zone16 -0.94065180 0.30465849 -3.088 0.002018 **
## zone17 -0.27644903 0.37068180 -0.746 0.455797
## count0_1 -0.63099114 0.03972981 -15.882 < 0.0000000000000002 ***
## count0_2 -1.05471515 0.06096876 -17.299 < 0.0000000000000002 ***
## count1_0 -0.32445905 0.04035505 -8.040 0.000000000000000898 ***
## count1_1 -0.77088056 0.04466859 -17.258 < 0.0000000000000002 ***
## count1_2 -1.01267040 0.04930077 -20.541 < 0.0000000000000002 ***
## count2_0 -0.43199652 0.05982086 -7.222 0.000000000000514163 ***
## count2_1 -0.89747292 0.05796989 -15.482 < 0.0000000000000002 ***
## count2_2 -1.20077475 0.05090482 -23.589 < 0.0000000000000002 ***
## count3_0 0.66130927 0.08854101 7.469 0.000000000000080831 ***
## count3_1 -0.91844068 0.07698930 -11.929 < 0.0000000000000002 ***
## count3_2 -1.47085921 0.06132638 -23.984 < 0.0000000000000002 ***
## pfx_total 0.34150808 0.12009437 2.844 0.004460 **
## break_change 0.90602065 0.11095569 8.166 0.000000000000000320 ***
## distance 0.78405900 0.11636185 6.738 0.000000000016045948 ***
## release_spin_rate 0.00035075 0.00007507 4.673 0.000002974568745848 ***
## I(plate_x^2) -1.33736701 0.06042433 -22.133 < 0.0000000000000002 ***
## I(plate_z^2) -0.95346597 0.05338985 -17.859 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 52181 on 42689 degrees of freedom
## Residual deviance: 43777 on 42653 degrees of freedom
## (87 observations deleted due to missingness)
## AIC: 43851
##
## Number of Fisher Scoring iterations: 7
# Model Predictions
fb_preds_strike <- fastballs3 %>%
mutate(prediction_log = predict(fb_model_strike, fastballs3),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
ggplot(fb_preds_strike, aes(x = prediction, y = as.numeric(is_strike))) +
geom_point(alpha = 0.01) +
geom_smooth() +
geom_smooth(method = "lm") +
geom_abline(slope = 1, intercept = 0, color = "red") +
theme_bw()
# Initial Model
sl_model_all <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
count + pfx_total + speed_change + break_change + distance +
prev_pitch + speed_fb_diff + pfx_x_fb_diff + pfx_z_fb_diff + release_spin_rate +
I(plate_x^2) + I(plate_z^2),
data = sliders3, family = binomial)
# Model Evaluation
# summary(sl_model_all)
sl_model <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
pfx_total + speed_change + break_change + distance + release_spin_rate +
prev_pitch + speed_fb_diff + I(plate_x^2) + I(plate_z^2),
data = sliders3, family = binomial)
sl_model %>% summary
##
## Call:
## glm(formula = whiff ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + pfx_total + speed_change + break_change +
## distance + release_spin_rate + prev_pitch + speed_fb_diff +
## I(plate_x^2) + I(plate_z^2), family = binomial, data = sliders3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.21621327 0.92873085 -8.847 < 0.0000000000000002 ***
## pitch_speed 0.01379833 0.00950387 1.452 0.146539
## pfx_x 0.25569580 0.13139319 1.946 0.051650 .
## pfx_z 0.42842356 0.08683887 4.934 0.0000008074996 ***
## plate_x 0.74531223 0.12539349 5.944 0.0000000027851 ***
## plate_z 3.09714611 0.28408297 10.902 < 0.0000000000000002 ***
## zone1 0.38978765 0.25859110 1.507 0.131721
## zone2 0.44246683 0.18335350 2.413 0.015814 *
## zone3 -0.19221608 0.20819683 -0.923 0.355881
## zone4 -0.70481946 0.26992612 -2.611 0.009024 **
## zone6 -0.27504771 0.14374089 -1.913 0.055684 .
## zone7 -0.16599387 0.24143054 -0.688 0.491741
## zone8 0.13680416 0.14367804 0.952 0.341017
## zone9 0.18917878 0.14742171 1.283 0.199405
## zone11 0.73602843 0.30001116 2.453 0.014154 *
## zone12 -0.49401425 0.20861735 -2.368 0.017883 *
## zone13 0.28805261 0.21217246 1.358 0.174580
## zone14 0.35478445 0.16987034 2.089 0.036747 *
## zone16 -0.50459713 1.04806302 -0.481 0.630192
## zone17 -0.43662410 0.21459306 -2.035 0.041885 *
## pfx_total -0.63699864 0.14815625 -4.300 0.0000171179534 ***
## speed_change 0.06769027 0.02117828 3.196 0.001392 **
## break_change 0.49887128 0.15124336 3.298 0.000972 ***
## distance 1.40199266 0.16784357 8.353 < 0.0000000000000002 ***
## release_spin_rate 0.00033421 0.00009603 3.480 0.000501 ***
## prev_pitchFastball -0.17559463 0.04455425 -3.941 0.0000810946499 ***
## prev_pitchNone -0.35137282 0.05301926 -6.627 0.0000000000342 ***
## prev_pitchOff Speed -0.07831449 0.09720548 -0.806 0.420439
## speed_fb_diff -0.06485859 0.01173577 -5.527 0.0000000326550 ***
## I(plate_x^2) -1.00618680 0.07828916 -12.852 < 0.0000000000000002 ***
## I(plate_z^2) -0.72377372 0.06392984 -11.321 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 19130 on 22074 degrees of freedom
## Residual deviance: 17494 on 22044 degrees of freedom
## (31 observations deleted due to missingness)
## AIC: 17556
##
## Number of Fisher Scoring iterations: 7
# Model Predictions
sl_preds <- sliders3 %>%
mutate(prediction_log = predict(sl_model, sliders3),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
sl_preds %>%
mutate(prediction = round(prediction, 2)) %>%
group_by(prediction) %>%
summarize(n = n(), whiff_rate = mean(whiff)) %>%
as.data.frame() %>%
ggplot(aes(x = prediction, y = whiff_rate)) +
geom_point(aes(size = n)) + # size of bin shown on graph
coord_fixed() +
geom_smooth(se = FALSE) +
labs(y = "observed whiff proportion",
x = "projected % whiff chance",
title = "Slider Whiff Model Prediction",
subtitle = "Whiff proportion by predicted whiff value",
caption = "Whiff predictions have a 1% bin width")
# Initial Model
fb_model_all <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
count + pfx_total + speed_change + break_change + distance + release_spin_rate +
prev_pitch + pitch_type + I(plate_x^2) + I(plate_z^2),
data = fastballs3, family = binomial)
# Model Evaluation
# summary(fb_model_all)
fb_model <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
pfx_total + speed_change + break_change + release_spin_rate +
pitch_type + I(plate_x^2) + I(plate_z^2),
data = fastballs3, family = binomial)
fb_model %>% summary
##
## Call:
## glm(formula = whiff ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + pfx_total + speed_change + break_change +
## release_spin_rate + pitch_type + I(plate_x^2) + I(plate_z^2),
## family = binomial, data = fastballs3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -13.0432092 0.8231173 -15.846 < 0.0000000000000002 ***
## pitch_speed 0.0672112 0.0080667 8.332 < 0.0000000000000002 ***
## pfx_x 0.6257666 0.0859477 7.281 0.000000000000331884 ***
## pfx_z -0.7427616 0.1416365 -5.244 0.000000157013988062 ***
## plate_x -0.0568960 0.0632614 -0.899 0.368450
## plate_z 2.5341029 0.2336575 10.845 < 0.0000000000000002 ***
## zone1 0.1398252 0.0979558 1.427 0.153456
## zone2 0.1480009 0.0807817 1.832 0.066935 .
## zone3 0.3149557 0.0915465 3.440 0.000581 ***
## zone4 -0.3834776 0.1103761 -3.474 0.000512 ***
## zone6 0.2138205 0.0848359 2.520 0.011722 *
## zone7 0.3424103 0.1430845 2.393 0.016708 *
## zone8 -0.0670987 0.1210132 -0.554 0.579254
## zone9 -0.0487777 0.1264578 -0.386 0.699702
## zone11 0.2660080 0.1109173 2.398 0.016473 *
## zone12 0.0303559 0.1081706 0.281 0.778995
## zone13 1.1223798 0.1396766 8.036 0.000000000000000932 ***
## zone14 0.4616576 0.1337223 3.452 0.000556 ***
## zone16 -1.9920773 0.3330034 -5.982 0.000000002202072494 ***
## zone17 1.2271317 0.4080727 3.007 0.002637 **
## pfx_total 0.6062551 0.1898086 3.194 0.001403 **
## speed_change 0.1275708 0.0204957 6.224 0.000000000483784410 ***
## break_change 1.3238868 0.1695934 7.806 0.000000000000005892 ***
## release_spin_rate 0.0005245 0.0001209 4.337 0.000014437748864811 ***
## pitch_typeFF -0.4616070 0.1131537 -4.079 0.000045138488337887 ***
## pitch_typeSI -0.7771742 0.1311966 -5.924 0.000000003147059216 ***
## I(plate_x^2) -0.6472930 0.0514652 -12.577 < 0.0000000000000002 ***
## I(plate_z^2) -0.3369572 0.0394618 -8.539 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 25315 on 42647 degrees of freedom
## Residual deviance: 23307 on 42620 degrees of freedom
## (129 observations deleted due to missingness)
## AIC: 23363
##
## Number of Fisher Scoring iterations: 7
# Model Predictions
fb_preds <- fastballs3 %>%
mutate(prediction_log = predict(fb_model, fastballs3),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
fb_preds %>%
mutate(prediction = round(prediction, 2)) %>%
group_by(prediction) %>%
summarize(n = n(), whiff_rate = mean(whiff)) %>%
as.data.frame() %>%
ggplot(aes(x = prediction, y = whiff_rate)) +
geom_point(aes(size = n)) + # size of bin shown on graph
coord_fixed() +
geom_smooth(se = FALSE) +
labs(y = "observed whiff proportion",
x = "projected % whiff chance",
title = "Fastball Whiff Model Prediction",
subtitle = "Whiff proportion by predicted whiff value",
caption = "Whiff predictions have a 1% bin width")
## Intercept-Only Models
# Slider
sl_model_whiff_int <- glm(whiff ~ 1,
data = sliders, family = binomial)
sl_whiff_int <- augment(sl_model_whiff_int)
sl_whiff_int <- sl_whiff_int %>%
mutate(prediction_log = predict(sl_model_whiff_int, sl_whiff_int),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction)) %>%
mutate(Response = "whiff",
pitch = "slider",
type = "intercept only")
# Fastball
fb_model_whiff_int <- glm(whiff ~ 1,
data = fastballs, family = binomial)
fb_whiff_int <- augment(fb_model_whiff_int)
fb_whiff_int <- fb_whiff_int %>%
mutate(prediction_log = predict(fb_model_whiff_int, fb_whiff_int),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction)) %>%
mutate(Response = "whiff",
pitch = "fastball",
type = "intercept only")
## Zone-Only Models
# Slider
sl_model_whiff_zone <- glm(whiff ~ zone,
data = sliders3, family = binomial)
sl_whiff_zone <- augment(sl_model_whiff_zone)
sl_whiff_zone <- sl_whiff_zone %>%
mutate(prediction_log = predict(sl_model_whiff_zone, sl_whiff_zone),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction)) %>%
mutate(Response = "whiff",
pitch = "slider",
type = "zone only")
# Fastball
fb_model_whiff_zone <- glm(whiff ~ zone,
data = fastballs3, family = binomial)
fb_whiff_zone <- augment(fb_model_whiff_zone)
fb_whiff_zone <- fb_whiff_zone %>%
mutate(prediction_log = predict(fb_model_whiff_zone, fb_whiff_zone),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction)) %>%
mutate(Response = "whiff",
pitch = "fastball",
type = "zone only")
# Refined Model
sl_model_whiff_final <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
pfx_total + speed_change + break_change + prev_pitch +
speed_fb_diff + release_spin_rate +
I(plate_z^2) + pred_bwhiff,
data = sliders3, family = binomial)
# Model Evaluation
summary(sl_model_whiff_final)
##
## Call:
## glm(formula = whiff ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + pfx_total + speed_change + break_change +
## prev_pitch + speed_fb_diff + release_spin_rate + I(plate_z^2) +
## pred_bwhiff, family = binomial, data = sliders3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.80878344 0.89007871 -9.897 < 0.0000000000000002 ***
## pitch_speed 0.02326646 0.00959716 2.424 0.015338 *
## pfx_x 0.31131325 0.13220977 2.355 0.018538 *
## pfx_z 0.45948159 0.08749418 5.252 0.000000150810218 ***
## plate_x -0.23247981 0.05998751 -3.875 0.000106 ***
## plate_z 0.95888391 0.15172625 6.320 0.000000000261854 ***
## zone1 0.37881051 0.23170680 1.635 0.102076
## zone2 0.95725352 0.17340920 5.520 0.000000033861350 ***
## zone3 0.75641326 0.19305927 3.918 0.000089273216336 ***
## zone4 -1.02818860 0.25655721 -4.008 0.000061328858026 ***
## zone6 0.43121058 0.13343144 3.232 0.001231 **
## zone7 -0.31786948 0.22186265 -1.433 0.151935
## zone8 0.53994511 0.13473803 4.007 0.000061398738835 ***
## zone9 1.05378145 0.12686375 8.306 < 0.0000000000000002 ***
## zone11 0.54034473 0.22823323 2.368 0.017908 *
## zone12 0.69425148 0.18075310 3.841 0.000123 ***
## zone13 0.40701650 0.17620733 2.310 0.020895 *
## zone14 1.38196829 0.13705675 10.083 < 0.0000000000000002 ***
## zone16 -0.64943786 1.02834402 -0.632 0.527689
## zone17 0.09380503 0.19185864 0.489 0.624893
## pfx_total -0.65653639 0.14906745 -4.404 0.000010613050527 ***
## speed_change 0.05394588 0.02136534 2.525 0.011572 *
## break_change 0.47182090 0.15239170 3.096 0.001961 **
## prev_pitchFastball -0.14097266 0.04493446 -3.137 0.001705 **
## prev_pitchNone -0.34475032 0.05348009 -6.446 0.000000000114591 ***
## prev_pitchOff Speed -0.00889866 0.09788078 -0.091 0.927561
## speed_fb_diff -0.06462824 0.01186354 -5.448 0.000000051043876 ***
## release_spin_rate 0.00037183 0.00009683 3.840 0.000123 ***
## I(plate_z^2) -0.32429647 0.04362696 -7.433 0.000000000000106 ***
## pred_bwhiff 18.52544050 0.93034175 19.913 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 19130 on 22074 degrees of freedom
## Residual deviance: 17290 on 22045 degrees of freedom
## (31 observations deleted due to missingness)
## AIC: 17350
##
## Number of Fisher Scoring iterations: 7
sl_whiff <- augment(sl_model_whiff_final)
sl_whiff <- sl_whiff %>%
mutate(prediction_log = predict(sl_model_whiff_final, sl_whiff),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction)) %>%
mutate(Response = "whiff",
pitch = "slider")
# Refined Model
fb_model_whiff_final <- glm(whiff ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
pfx_total + speed_change + break_change + prev_pitch +
release_spin_rate + I(plate_x^2) + I(plate_z^2) + pred_bwhiff,
data = fastballs3, family = binomial)
# Model Evaluation
summary(fb_model_whiff_final)
##
## Call:
## glm(formula = whiff ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + pfx_total + speed_change + break_change +
## prev_pitch + release_spin_rate + I(plate_x^2) + I(plate_z^2) +
## pred_bwhiff, family = binomial, data = fastballs3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -13.5919383 0.8088909 -16.803 < 0.0000000000000002 ***
## pitch_speed 0.0552552 0.0078549 7.034 0.00000000000199988 ***
## pfx_x 0.6349090 0.0806151 7.876 0.00000000000000339 ***
## pfx_z -1.1544701 0.1172599 -9.845 < 0.0000000000000002 ***
## plate_x -0.0572908 0.0635927 -0.901 0.367640
## plate_z 2.5729441 0.2358447 10.909 < 0.0000000000000002 ***
## zone1 0.1997184 0.0992443 2.012 0.044179 *
## zone2 0.2019836 0.0818785 2.467 0.013630 *
## zone3 0.3983497 0.0926473 4.300 0.00001710771423435 ***
## zone4 -0.4026278 0.1113651 -3.615 0.000300 ***
## zone6 0.2272683 0.0857170 2.651 0.008016 **
## zone7 0.2286930 0.1445253 1.582 0.113564
## zone8 -0.1368806 0.1218096 -1.124 0.261129
## zone9 -0.1079255 0.1272134 -0.848 0.396226
## zone11 0.4043838 0.1124988 3.595 0.000325 ***
## zone12 0.1648698 0.1096437 1.504 0.132662
## zone13 1.0843782 0.1409670 7.692 0.00000000000001444 ***
## zone14 0.4538287 0.1346351 3.371 0.000749 ***
## zone16 -1.7173247 0.3354893 -5.119 0.00000030737820315 ***
## zone17 1.2437602 0.4090633 3.041 0.002362 **
## pfx_total 0.8267996 0.1821178 4.540 0.00000562765769026 ***
## speed_change 0.0995532 0.0209438 4.753 0.00000200062697723 ***
## break_change 1.5060658 0.1686072 8.932 < 0.0000000000000002 ***
## prev_pitchFastball 0.0326755 0.0433306 0.754 0.450791
## prev_pitchNone -0.3730195 0.0522608 -7.138 0.00000000000094935 ***
## prev_pitchOff Speed 0.0282182 0.0841411 0.335 0.737348
## release_spin_rate 0.0005119 0.0001151 4.446 0.00000875563799586 ***
## I(plate_x^2) -0.6924502 0.0522011 -13.265 < 0.0000000000000002 ***
## I(plate_z^2) -0.3657328 0.0399723 -9.150 < 0.0000000000000002 ***
## pred_bwhiff 17.4420990 0.6854232 25.447 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 25315 on 42647 degrees of freedom
## Residual deviance: 22625 on 42618 degrees of freedom
## (129 observations deleted due to missingness)
## AIC: 22685
##
## Number of Fisher Scoring iterations: 7
fb_whiff <- augment(fb_model_whiff_final)
fb_whiff <- fb_whiff %>%
mutate(prediction_log = predict(fb_model_whiff_final, fb_whiff),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction)) %>%
mutate(Response = "whiff",
pitch = "fastball")
# Refined Model
sl_model_barrel_final <- glm(is_barrel ~ pitch_speed + plate_z + plate_x +
prev_pitch + speed_fb_diff + I(plate_x^2) + I(plate_z^2) + pred_bbarrel,
data = sliders3, family = binomial)
# Model Evaluation
summary(sl_model_barrel_final)
##
## Call:
## glm(formula = is_barrel ~ pitch_speed + plate_z + plate_x + prev_pitch +
## speed_fb_diff + I(plate_x^2) + I(plate_z^2) + pred_bbarrel,
## family = binomial, data = sliders3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.18572 2.98319 -3.079 0.00208 **
## pitch_speed -0.08777 0.02815 -3.118 0.00182 **
## plate_z 10.15457 1.23634 8.213 < 0.0000000000000002 ***
## plate_x -0.74899 0.17537 -4.271 0.0000194594585839 ***
## prev_pitchFastball 0.11413 0.15020 0.760 0.44735
## prev_pitchNone -0.43378 0.17970 -2.414 0.01578 *
## prev_pitchOff Speed 0.14818 0.31193 0.475 0.63475
## speed_fb_diff 0.08635 0.03560 2.426 0.01528 *
## I(plate_x^2) -2.32959 0.30838 -7.554 0.0000000000000421 ***
## I(plate_z^2) -2.07409 0.26002 -7.977 0.0000000000000015 ***
## pred_bbarrel 159.51045 10.94557 14.573 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2844.6 on 22092 degrees of freedom
## Residual deviance: 2151.8 on 22082 degrees of freedom
## (13 observations deleted due to missingness)
## AIC: 2173.8
##
## Number of Fisher Scoring iterations: 10
sl_barrel <- augment(sl_model_barrel_final)
sl_barrel <- sl_barrel %>%
mutate(prediction_log = predict(sl_model_barrel_final, sl_barrel),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction)) %>%
mutate(Response = "barrel",
pitch = "slider")
# Refined Model
fb_model_barrel_final <- glm(is_barrel ~ pitch_speed + pfx_z + plate_z +
break_change + prev_pitch + plate_x +
release_spin_rate + I(plate_x^2) + I(plate_z^2) + pred_bbarrel,
data = fastballs3, family = binomial)
# Model Evaluation
summary(fb_model_barrel_final)
##
## Call:
## glm(formula = is_barrel ~ pitch_speed + pfx_z + plate_z + break_change +
## prev_pitch + plate_x + release_spin_rate + I(plate_x^2) +
## I(plate_z^2) + pred_bbarrel, family = binomial, data = fastballs3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.3069933 1.6651535 -2.587 0.00969 **
## pitch_speed -0.0948534 0.0157780 -6.012 0.00000000184 ***
## pfx_z 0.2534667 0.1145416 2.213 0.02691 *
## plate_z 7.3962106 0.6199938 11.929 < 0.0000000000000002 ***
## break_change -1.0515577 0.3287176 -3.199 0.00138 **
## prev_pitchFastball 0.0390869 0.0941106 0.415 0.67790
## prev_pitchNone -0.5136715 0.1123902 -4.570 0.00000486723 ***
## prev_pitchOff Speed -0.0681368 0.1804846 -0.378 0.70579
## plate_x -0.4714559 0.0878082 -5.369 0.00000007911 ***
## release_spin_rate -0.0007390 0.0002309 -3.200 0.00138 **
## I(plate_x^2) -1.8263205 0.1411662 -12.937 < 0.0000000000000002 ***
## I(plate_z^2) -1.4156005 0.1153409 -12.273 < 0.0000000000000002 ***
## pred_bbarrel 107.5975098 5.7236975 18.799 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 7339.0 on 42689 degrees of freedom
## Residual deviance: 6260.6 on 42677 degrees of freedom
## (87 observations deleted due to missingness)
## AIC: 6286.6
##
## Number of Fisher Scoring iterations: 9
fb_barrel <- augment(fb_model_barrel_final)
fb_barrel <- fb_barrel %>%
mutate(prediction_log = predict(fb_model_barrel_final, fb_barrel),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction)) %>%
mutate(Response = "barrel",
pitch = "fastball")
# No Zone in Fastball?
summarize(fastballs3, b = mean(is_barrel), d = mean(distance), .by = zone) %>%
arrange(desc(b))
## # A tibble: 15 × 3
## zone b d
## <fct> <dbl> <dbl>
## 1 5 0.0418 0.234
## 2 4 0.0401 0.557
## 3 2 0.0321 0.668
## 4 6 0.0266 0.571
## 5 7 0.0260 0.843
## 6 8 0.0205 0.648
## 7 3 0.0183 0.835
## 8 1 0.0176 0.856
## 9 9 0.00881 0.861
## 10 13 0.00777 1.31
## 11 12 0.00744 1.34
## 12 11 0.00578 1.40
## 13 14 0 1.37
## 14 17 0 2.30
## 15 16 0 2.36
summarize(fastballs3, b = mean(is_barrel), d = mean(distance), .by = zone) %>%
arrange(desc(b)) %>%
with(cor(d, b))
## [1] -0.8422974
# Refined Model
sliders3 <- sliders3 %>%
mutate(fb_prev = ifelse(prev_pitch == "Fastball", 1, 0)) %>%
mutate(count_s = case_when(count %in% c("0_0", "1_1", "2_2") ~ "Even",
count %in% c("1_0", "2_1", "2_0", "3_0", "3_1") ~ "Hitter",
count %in% c("0_1", "0_2", "1_2") ~ "Pitcher",
count == "3_2" ~ "Full"))
sl_model_strike_final <- glm(is_strike ~ pfx_x + pfx_z + plate_x + plate_z + zone +
count_s + pfx_total + break_change + distance + prev_pitch +
speed_fb_diff + release_spin_rate +
I(plate_x^2) + I(plate_z^2),
data = sliders3, family = binomial)
# Model Evaluation
summary(sl_model_strike_final)
##
## Call:
## glm(formula = is_strike ~ pfx_x + pfx_z + plate_x + plate_z +
## zone + count_s + pfx_total + break_change + distance + prev_pitch +
## speed_fb_diff + release_spin_rate + I(plate_x^2) + I(plate_z^2),
## family = binomial, data = sliders3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.95025380 0.37550196 -13.183 < 0.0000000000000002 ***
## pfx_x 0.23839221 0.10328061 2.308 0.02099 *
## pfx_z 0.18491137 0.06590690 2.806 0.00502 **
## plate_x 0.46386633 0.09033130 5.135 0.0000002818933587 ***
## plate_z 2.45803270 0.22543449 10.904 < 0.0000000000000002 ***
## zone1 1.19268424 0.15378690 7.755 0.0000000000000088 ***
## zone2 0.66944751 0.11694487 5.724 0.0000000103756699 ***
## zone3 0.55258438 0.13153879 4.201 0.0000265828442377 ***
## zone4 0.49157300 0.12118791 4.056 0.0000498588881342 ***
## zone6 -0.04426440 0.09169664 -0.483 0.62929
## zone7 0.47195408 0.14588479 3.235 0.00122 **
## zone8 0.05236311 0.09726019 0.538 0.59031
## zone9 0.13024189 0.10397632 1.253 0.21035
## zone11 -0.45938264 0.21207454 -2.166 0.03030 *
## zone12 -0.44645176 0.14612246 -3.055 0.00225 **
## zone13 -0.48725550 0.16628431 -2.930 0.00339 **
## zone14 -0.16279605 0.12661978 -1.286 0.19855
## zone16 -1.80640227 1.03557477 -1.744 0.08110 .
## zone17 -0.26112200 0.17099826 -1.527 0.12675
## count_sFull -0.15783734 0.09108560 -1.733 0.08312 .
## count_sHitter 0.46310520 0.05118051 9.048 < 0.0000000000000002 ***
## count_sPitcher -0.09325065 0.04506279 -2.069 0.03851 *
## pfx_total -0.51880950 0.11441163 -4.535 0.0000057716281996 ***
## break_change 0.28892243 0.11733979 2.462 0.01381 *
## distance 0.83850669 0.13257275 6.325 0.0000000002534302 ***
## prev_pitchFastball -0.14942047 0.03734628 -4.001 0.0000630896086984 ***
## prev_pitchNone 0.59426251 0.05044210 11.781 < 0.0000000000000002 ***
## prev_pitchOff Speed -0.07509416 0.08330675 -0.901 0.36737
## speed_fb_diff -0.05304525 0.00805446 -6.586 0.0000000000452374 ***
## release_spin_rate 0.00037446 0.00007346 5.097 0.0000003445692910 ***
## I(plate_x^2) -0.90294934 0.06193532 -14.579 < 0.0000000000000002 ***
## I(plate_z^2) -0.56083909 0.04890608 -11.468 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 27338 on 22074 degrees of freedom
## Residual deviance: 24773 on 22043 degrees of freedom
## (31 observations deleted due to missingness)
## AIC: 24837
##
## Number of Fisher Scoring iterations: 7
sl_strike <- augment(sl_model_strike_final)
sl_strike <- sl_strike %>%
mutate(prediction_log = predict(sl_model_strike_final, sl_strike),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction)) %>%
mutate(Response = "strike",
pitch = "slider")
# Refined Model
fb_model_strike_final <- glm(is_strike ~ pitch_speed + pfx_x + pfx_z + plate_x + plate_z + zone +
count + pfx_total + break_change + distance +
release_spin_rate + I(plate_x^2) + I(plate_z^2),
data = fastballs3, family = binomial)
# Model Evaluation
summary(fb_model_strike_final)
##
## Call:
## glm(formula = is_strike ~ pitch_speed + pfx_x + pfx_z + plate_x +
## plate_z + zone + count + pfx_total + break_change + distance +
## release_spin_rate + I(plate_x^2) + I(plate_z^2), family = binomial,
## data = fastballs3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.01959874 0.59247854 -16.911 < 0.0000000000000002 ***
## pitch_speed 0.02377242 0.00499255 4.762 0.000001920851941636 ***
## pfx_x 0.18436160 0.05667941 3.253 0.001143 **
## pfx_z -0.31509822 0.07641856 -4.123 0.000037344943541671 ***
## plate_x 0.37389991 0.04774599 7.831 0.000000000000004839 ***
## plate_z 5.09165838 0.27486149 18.524 < 0.0000000000000002 ***
## zone1 0.12371525 0.08258168 1.498 0.134108
## zone2 -0.02048224 0.06558575 -0.312 0.754815
## zone3 0.43733275 0.07575427 5.773 0.000000007785190804 ***
## zone4 -0.07663348 0.06980848 -1.098 0.272306
## zone6 0.65471683 0.06008547 10.896 < 0.0000000000000002 ***
## zone7 0.94474809 0.08976595 10.525 < 0.0000000000000002 ***
## zone8 0.70972523 0.07005044 10.132 < 0.0000000000000002 ***
## zone9 1.36380395 0.07958839 17.136 < 0.0000000000000002 ***
## zone11 0.05476433 0.10129449 0.541 0.588752
## zone12 -0.24069515 0.09339496 -2.577 0.009961 **
## zone13 0.36887777 0.11199130 3.294 0.000988 ***
## zone14 -0.14452084 0.09818047 -1.472 0.141023
## zone16 -0.94065180 0.30465849 -3.088 0.002018 **
## zone17 -0.27644903 0.37068180 -0.746 0.455797
## count0_1 -0.63099114 0.03972981 -15.882 < 0.0000000000000002 ***
## count0_2 -1.05471515 0.06096876 -17.299 < 0.0000000000000002 ***
## count1_0 -0.32445905 0.04035505 -8.040 0.000000000000000898 ***
## count1_1 -0.77088056 0.04466859 -17.258 < 0.0000000000000002 ***
## count1_2 -1.01267040 0.04930077 -20.541 < 0.0000000000000002 ***
## count2_0 -0.43199652 0.05982086 -7.222 0.000000000000514163 ***
## count2_1 -0.89747292 0.05796989 -15.482 < 0.0000000000000002 ***
## count2_2 -1.20077475 0.05090482 -23.589 < 0.0000000000000002 ***
## count3_0 0.66130927 0.08854101 7.469 0.000000000000080831 ***
## count3_1 -0.91844068 0.07698930 -11.929 < 0.0000000000000002 ***
## count3_2 -1.47085921 0.06132638 -23.984 < 0.0000000000000002 ***
## pfx_total 0.34150808 0.12009437 2.844 0.004460 **
## break_change 0.90602065 0.11095569 8.166 0.000000000000000320 ***
## distance 0.78405900 0.11636185 6.738 0.000000000016045948 ***
## release_spin_rate 0.00035075 0.00007507 4.673 0.000002974568745848 ***
## I(plate_x^2) -1.33736701 0.06042433 -22.133 < 0.0000000000000002 ***
## I(plate_z^2) -0.95346597 0.05338985 -17.859 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 52181 on 42689 degrees of freedom
## Residual deviance: 43777 on 42653 degrees of freedom
## (87 observations deleted due to missingness)
## AIC: 43851
##
## Number of Fisher Scoring iterations: 7
fb_strike <- augment(fb_model_strike_final)
fb_strike <- fb_strike %>%
mutate(prediction_log = predict(fb_model_strike_final, fb_strike),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction)) %>%
mutate(Response = "strike",
pitch = "fastball")
all_models_data <- bind_rows(
sl_whiff, fb_whiff,
sl_barrel, fb_barrel,
sl_strike, fb_strike,
sl_whiff_int, fb_whiff_int,
sl_whiff_zone, fb_whiff_zone) %>%
select(Response:pitch, type, whiff:pred_bwhiff, `I(plate_x^2)`:count, `.fitted`:rounded_pred)
all_models <- bind_rows(
tidy(sl_model_whiff) %>% mutate(Response = "whiff", pitch = "slider", type = "complex"),
tidy(fb_model_whiff) %>% mutate(Response = "whiff", pitch = "fastball", type = "complex"),
tidy(sl_model_barrel) %>% mutate(Response = "barrel", pitch = "slider", type = "complex"),
tidy(fb_model_barrel) %>% mutate(Response = "barrel", pitch = "fastball", type = "complex"),
tidy(sl_model_strike) %>% mutate(Response = "strike", pitch = "slider", type = "complex"),
tidy(fb_model_strike) %>% mutate(Response = "strike", pitch = "fastball", type = "complex"),
tidy(sl_model_whiff_int) %>% mutate(Response = "whiff", pitch = "slider",
type = "intercept only"),
tidy(fb_model_whiff_int) %>% mutate(Response = "whiff", pitch = "fastball",
type = "intercept only"),
tidy(sl_model_whiff_zone) %>% mutate(Response = "whiff", pitch = "slider",
type = "zone only"),
tidy(fb_model_whiff_zone) %>% mutate(Response = "whiff", pitch = "fastball",
type = "zone only")
)
all_models_data$type <- all_models_data$type %>%
replace_na("complex")
# P-Value
all_models %>%
filter(type == "complex") %>%
mutate(model_id = paste0(Response, pitch, type)) %>%
group_by(model_id) %>%
ggplot(aes(x = Response, y = term, fill = p.value)) +
geom_tile() +
facet_wrap(~ pitch) +
scale_fill_gradient(limits = c(0, 0.05),
low = "green", high = "yellow", na.value = "lightgray") +
theme_classic() +
NULL
# Estimate
all_models %>%
filter(type == "complex") %>%
mutate(model_id = paste0(Response, pitch, type)) %>%
group_by(model_id) %>%
ggplot(aes(x = Response, y = term, fill = estimate)) +
geom_tile() +
facet_wrap(~ pitch) +
scale_fill_brewer(type = "div") +
scale_fill_gradient2(low = "red", high = "green", mid = "white", midpoint = 0,
limits = c(-10, 10)) +
theme_classic() +
NULL
# Log Estimate
all_models %>%
filter(type == "complex") %>%
mutate(log_estimate = log(abs(estimate))*(estimate/abs(estimate))) %>%
mutate(model_id = paste0(Response, pitch, type)) %>%
group_by(model_id) %>%
ggplot(aes(x = Response, y = term, fill = log_estimate)) +
geom_tile() +
facet_wrap(~ pitch) +
scale_fill_brewer(type = "div") +
scale_fill_gradient2(low = "red", high = "green", mid = "white", midpoint = 0) +
theme_classic() +
NULL
# all_pitches %>%
# mutate(pitch_speed = round(pitch_speed, 0)) %>%
# group_by(p_throws, hitter, pitch_type, pitch_speed) %>%
# filter(!is.na(pfx_x)) %>%
# filter(!is.na(pfx_z)) %>%
# summarize(min_x = min(pfx_x), max_x = max(pfx_x),
# min_z = min(pfx_z), max_z = max(pfx_z)) %>%
# write.csv("Movement.csv")
# write.csv(all_models, "models1.csv")